初始化项目,由ModelHub XC社区提供模型
Model: W-61/llama3-hh-helpful-qt045-b0p3-20260429-085449 Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
62
README.md
Normal file
62
README.md
Normal file
@@ -0,0 +1,62 @@
|
||||
---
|
||||
library_name: transformers
|
||||
base_model: W-61/llama-3-8b-base-sft-hh-helpful-4xh200
|
||||
tags:
|
||||
- alignment-handbook
|
||||
- new-dpo
|
||||
- generated_from_trainer
|
||||
datasets:
|
||||
- Anthropic/hh-rlhf
|
||||
model-index:
|
||||
- name: llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449
|
||||
results: []
|
||||
---
|
||||
|
||||
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||
should probably proofread and complete it, then remove this comment. -->
|
||||
|
||||
# llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449
|
||||
|
||||
This model is a fine-tuned version of [W-61/llama-3-8b-base-sft-hh-helpful-4xh200](https://huggingface.co/W-61/llama-3-8b-base-sft-hh-helpful-4xh200) on the Anthropic/hh-rlhf dataset.
|
||||
|
||||
## Model description
|
||||
|
||||
More information needed
|
||||
|
||||
## Intended uses & limitations
|
||||
|
||||
More information needed
|
||||
|
||||
## Training and evaluation data
|
||||
|
||||
More information needed
|
||||
|
||||
## Training procedure
|
||||
|
||||
### Training hyperparameters
|
||||
|
||||
The following hyperparameters were used during training:
|
||||
- learning_rate: 5e-07
|
||||
- train_batch_size: 8
|
||||
- eval_batch_size: 8
|
||||
- seed: 42
|
||||
- distributed_type: multi-GPU
|
||||
- num_devices: 4
|
||||
- gradient_accumulation_steps: 2
|
||||
- total_train_batch_size: 64
|
||||
- total_eval_batch_size: 32
|
||||
- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
||||
- lr_scheduler_type: cosine
|
||||
- lr_scheduler_warmup_ratio: 0.1
|
||||
- num_epochs: 1
|
||||
|
||||
### Training results
|
||||
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- Transformers 4.51.0
|
||||
- Pytorch 2.3.1+cu121
|
||||
- Datasets 2.21.0
|
||||
- Tokenizers 0.21.4
|
||||
9
all_results.json
Normal file
9
all_results.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"epoch": 1.0,
|
||||
"total_flos": 0.0,
|
||||
"train_loss": 1.095842420442164,
|
||||
"train_runtime": 1736.9553,
|
||||
"train_samples": 43598,
|
||||
"train_samples_per_second": 25.1,
|
||||
"train_steps_per_second": 0.392
|
||||
}
|
||||
29
config.json
Normal file
29
config.json
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"architectures": [
|
||||
"LlamaForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 128000,
|
||||
"eos_token_id": 128001,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 14336,
|
||||
"max_position_embeddings": 8192,
|
||||
"mlp_bias": false,
|
||||
"model_type": "llama",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 32,
|
||||
"num_key_value_heads": 8,
|
||||
"pretraining_tp": 1,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 500000.0,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "float32",
|
||||
"transformers_version": "4.51.0",
|
||||
"use_cache": true,
|
||||
"vocab_size": 128256
|
||||
}
|
||||
9
generation_config.json
Normal file
9
generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"bos_token_id": 128000,
|
||||
"do_sample": true,
|
||||
"eos_token_id": 128001,
|
||||
"max_length": 4096,
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.9,
|
||||
"transformers_version": "4.51.0"
|
||||
}
|
||||
681
margin_logs/margins.jsonl
Normal file
681
margin_logs/margins.jsonl
Normal file
@@ -0,0 +1,681 @@
|
||||
{"epoch": 0.0, "step": 1, "batch_size": 64, "mean": -0.02287048101425171, "std": 0.42023447155952454, "min": -1.4034271240234375, "p10": -0.46674575805664065, "median": 0.04234886169433594, "p90": 0.4323463439941407, "max": 0.89263916015625, "pos_frac": 0.53125, "sample": [-0.06523895263671875, 0.436798095703125, 0.27811431884765625, -0.9194221496582031, 0.018890380859375, 0.20587158203125, 0.18878173828125, -0.3968696594238281, 0.26206207275390625, 0.2470550537109375, -0.040912628173828125, 0.4394989013671875, -0.44133758544921875, -0.39148712158203125, 0.2764854431152344, 0.89263916015625, -0.42584991455078125, -0.46125030517578125, -0.8638992309570312, -0.3508758544921875, 0.371368408203125, 0.887847900390625, -0.382904052734375, 0.36145782470703125, -0.4890003204345703, 0.052455902099609375, -0.036136627197265625, 0.23079299926757812, 0.2469482421875, 0.1643218994140625, -0.07129669189453125, 0.2790794372558594, 0.3637123107910156, -0.8916168212890625, 0.03298759460449219, -0.2790107727050781, -0.17860984802246094, 0.23892593383789062, 0.05171012878417969, -0.2564239501953125, -0.14655303955078125, 0.27777862548828125, 0.0810394287109375, -1.4034271240234375, -0.28739166259765625, -0.1489429473876953, 0.44918060302734375, 0.1693286895751953, 0.10933303833007812, -0.14766693115234375, -0.40944671630859375, -0.18532562255859375, 0.6261310577392578, -0.20856857299804688, 0.602569580078125, 0.05538177490234375, 0.1505279541015625, 0.1313800811767578, -0.006317138671875, 0.42195892333984375, -0.29936981201171875, -0.4691009521484375, 0.16705322265625, -0.5789260864257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000001.npy"}
|
||||
{"epoch": 0.0014684287812041115, "step": 2, "batch_size": 64, "mean": -0.06572240591049194, "std": 0.3523969054222107, "min": -0.9291305541992188, "p10": -0.46334152221679686, "median": -0.05502510070800781, "p90": 0.3672500610351563, "max": 1.0444793701171875, "pos_frac": 0.4375, "sample": [-0.2829437255859375, 0.3027191162109375, -0.19867706298828125, -0.3062286376953125, 0.10318756103515625, 0.20131683349609375, -0.34906005859375, 0.2802886962890625, 0.1914520263671875, -0.31072998046875, 0.08922195434570312, 0.10284614562988281, -0.03655242919921875, -0.0604095458984375, -0.06208038330078125, 0.32562255859375, -0.37982177734375, 0.2746162414550781, -0.049640655517578125, 0.3752174377441406, -0.103973388671875, 0.0699462890625, 0.36417388916015625, -0.033428192138671875, 0.37265777587890625, -0.3787078857421875, -0.6610565185546875, 0.4720420837402344, 0.47701263427734375, -0.27928924560546875, -0.44719696044921875, -0.0965118408203125, -0.7628555297851562, 0.046764373779296875, 0.06670379638671875, -0.9291305541992188, -0.7122802734375, -0.16554832458496094, 0.1485595703125, -0.07539939880371094, 0.2588920593261719, 0.039890289306640625, 0.201690673828125, 0.0623016357421875, 1.0444793701171875, -0.37696075439453125, -0.02794647216796875, -0.223297119140625, -0.35730743408203125, -0.1309051513671875, -0.3106689453125, -0.11409187316894531, -0.1669769287109375, 0.131317138671875, -0.2361297607421875, 0.4093780517578125, -0.6485977172851562, 0.36856842041015625, -0.1951904296875, -0.4702606201171875, -0.7624168395996094, 0.008928298950195312, -0.31630706787109375, 0.022550582885742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000002.npy"}
|
||||
{"epoch": 0.002936857562408223, "step": 3, "batch_size": 64, "mean": 0.04345354437828064, "std": 0.3514525890350342, "min": -0.9645233154296875, "p10": -0.36395721435546874, "median": 0.018914222717285156, "p90": 0.5056064605712892, "max": 0.94873046875, "pos_frac": 0.5625, "sample": [-0.10715484619140625, -0.08351516723632812, 0.5889053344726562, 0.1807098388671875, 0.011278152465820312, -0.2550392150878906, 0.1979045867919922, 0.23459625244140625, 0.009063720703125, 0.0009822845458984375, 0.03350830078125, -0.06809806823730469, 0.5584564208984375, -0.3628997802734375, -0.08093833923339844, 0.5604400634765625, -0.250335693359375, -0.3695068359375, 0.14037704467773438, -0.1947460174560547, -0.43294525146484375, 0.3804931640625, 0.47283172607421875, 0.41748046875, -0.5289688110351562, -0.06763267517089844, 0.20258331298828125, 0.09070014953613281, 0.21934127807617188, -0.0230255126953125, 0.33235931396484375, 0.519134521484375, 0.15324020385742188, 0.94873046875, 0.04927253723144531, -0.24961090087890625, -0.27283477783203125, 0.07157325744628906, 0.13065338134765625, 0.2204875946044922, 0.0110015869140625, 0.02655029296875, -0.0211181640625, 0.29084014892578125, 0.36540985107421875, -0.01886749267578125, 0.18338775634765625, 0.4740409851074219, -0.9645233154296875, -0.3450508117675781, 0.3157062530517578, -0.2563018798828125, 0.2875175476074219, -0.04862213134765625, 0.08238410949707031, 0.771209716796875, -0.18851470947265625, -0.4689178466796875, -0.279205322265625, 0.6390380859375, -0.18415069580078125, -0.364410400390625, -0.207855224609375, -0.6963729858398438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000003.npy"}
|
||||
{"epoch": 0.004405286343612335, "step": 4, "batch_size": 64, "mean": -0.04964029788970947, "std": 0.3945470452308655, "min": -0.9305191040039062, "p10": -0.5812477111816406, "median": -0.03455543518066406, "p90": 0.47739105224609407, "max": 0.8212814331054688, "pos_frac": 0.484375, "sample": [0.061351776123046875, -0.23756790161132812, 0.05895423889160156, -0.0815277099609375, 0.16788482666015625, 0.157867431640625, 0.6239471435546875, -0.520538330078125, -0.5852718353271484, -0.10882568359375, 0.010541915893554688, 0.2277679443359375, -0.03057861328125, 0.157440185546875, -0.305206298828125, -0.09641456604003906, -0.5810928344726562, -0.12966156005859375, -0.5813140869140625, 0.15638351440429688, -0.6052703857421875, 0.1003570556640625, -0.337432861328125, -0.0485687255859375, 0.02715301513671875, 0.04534912109375, 0.0626983642578125, 0.2958869934082031, -0.12537193298339844, -0.490386962890625, 0.018581390380859375, 0.026273727416992188, -0.8528518676757812, 0.036243438720703125, -0.23577308654785156, 0.398895263671875, -0.9305191040039062, 0.76495361328125, -0.15886688232421875, 0.5110321044921875, 0.04460906982421875, -0.11618995666503906, 0.6486167907714844, -0.077972412109375, -0.3894920349121094, 0.2182159423828125, -0.919036865234375, 0.78155517578125, -0.2024059295654297, -0.6825180053710938, -0.038532257080078125, 0.154815673828125, 0.15358734130859375, -0.3866119384765625, 0.04154205322265625, 0.3796272277832031, 0.8212814331054688, -0.2986297607421875, 0.7134170532226562, 0.12494659423828125, -0.3333892822265625, -0.2069530487060547, -0.4066925048828125, -0.067291259765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000004.npy"}
|
||||
{"epoch": 0.005873715124816446, "step": 5, "batch_size": 64, "mean": -0.0020435750484466553, "std": 0.3818022608757019, "min": -1.0519561767578125, "p10": -0.4678718566894531, "median": -0.013476371765136719, "p90": 0.4520347595214844, "max": 1.4437942504882812, "pos_frac": 0.5, "sample": [-0.5406723022460938, 0.5097732543945312, -0.0779876708984375, 0.28507232666015625, -0.19936370849609375, -0.3983001708984375, 0.21741104125976562, -0.053516387939453125, -0.1872577667236328, -0.6293258666992188, 0.03513336181640625, 0.151123046875, 0.010675430297851562, 0.5047073364257812, -0.07842254638671875, -0.2365264892578125, 0.07879638671875, -0.2579765319824219, 0.07564544677734375, -0.140960693359375, -0.04438018798828125, 0.40729522705078125, 0.3504905700683594, -0.04123687744140625, 0.042324066162109375, -0.22156524658203125, 0.3556060791015625, -0.49768829345703125, -0.25897216796875, -0.1252288818359375, 0.04027557373046875, 0.07487106323242188, 0.439788818359375, -0.10175704956054688, 0.05572700500488281, 0.1066436767578125, 0.6526260375976562, -0.037628173828125, 0.45728302001953125, 0.232147216796875, 0.2482452392578125, 0.30036163330078125, 0.5230636596679688, -1.0519561767578125, 1.4437942504882812, -0.20098876953125, -0.36346435546875, 0.17290496826171875, -0.2476654052734375, -0.6851654052734375, 0.3643608093261719, -0.24559783935546875, -0.30460357666015625, -0.2303619384765625, 0.09027099609375, 0.11786651611328125, -0.240325927734375, -0.549072265625, -0.21100616455078125, 0.13555908203125, -0.5061874389648438, 0.202789306640625, -0.3787994384765625, 0.5305404663085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000005.npy"}
|
||||
{"epoch": 0.007342143906020558, "step": 6, "batch_size": 64, "mean": 0.0013370811939239502, "std": 0.3965916335582733, "min": -0.884521484375, "p10": -0.5403009414672851, "median": 0.041778564453125, "p90": 0.43555145263671885, "max": 0.876556396484375, "pos_frac": 0.53125, "sample": [0.2621002197265625, 0.14189720153808594, -0.061717987060546875, -0.3359222412109375, 0.08560943603515625, 0.778656005859375, -0.5179119110107422, -0.210540771484375, 0.288482666015625, -0.2541351318359375, 0.2830543518066406, 0.17195510864257812, -0.354583740234375, 0.37310218811035156, -0.3335304260253906, -0.5501861572265625, -0.7698211669921875, -0.1726207733154297, 0.3455390930175781, -0.02059173583984375, -0.0502166748046875, -0.02367401123046875, 0.758575439453125, 0.00298309326171875, 0.07989501953125, 0.064056396484375, 0.40869903564453125, 0.32279205322265625, 0.08235931396484375, 0.3210716247558594, -0.168914794921875, 0.09842109680175781, 0.6276321411132812, 0.31247711181640625, 0.44705963134765625, -0.332733154296875, -0.884521484375, -0.5848236083984375, 0.18653106689453125, -0.76116943359375, -0.475982666015625, -0.145843505859375, 0.276641845703125, 0.6478900909423828, 0.23475265502929688, 0.06482315063476562, 0.2370166778564453, 0.357208251953125, 0.6431732177734375, -0.3962554931640625, -0.4061279296875, 0.06789398193359375, -0.549896240234375, -0.1432647705078125, 0.876556396484375, -0.2908058166503906, -0.034820556640625, -0.6832275390625, -0.296661376953125, 0.13542938232421875, 0.019500732421875, -0.23724746704101562, 0.35137939453125, -0.221893310546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000006.npy"}
|
||||
{"epoch": 0.00881057268722467, "step": 7, "batch_size": 64, "mean": 0.008150070905685425, "std": 0.3914256691932678, "min": -0.8945465087890625, "p10": -0.5682880401611328, "median": 0.015428543090820312, "p90": 0.5658348083496099, "max": 0.7113742828369141, "pos_frac": 0.515625, "sample": [-0.20085525512695312, -0.5913543701171875, 0.07504653930664062, -0.08150863647460938, -0.8945465087890625, -0.04071807861328125, -0.4536552429199219, 0.362091064453125, -0.2231597900390625, 0.6511383056640625, 0.64111328125, -0.22243499755859375, 0.03455352783203125, 0.23462677001953125, -0.1164398193359375, 0.3066558837890625, 0.09305191040039062, -0.24335289001464844, -0.3175926208496094, -0.8755645751953125, 0.23351669311523438, 0.0633544921875, 0.297119140625, 0.6741256713867188, 0.00726318359375, -0.08202362060546875, -0.07105636596679688, -0.16195297241210938, -0.5248985290527344, 0.11455917358398438, 0.023593902587890625, 0.6302337646484375, -0.13311386108398438, 0.152801513671875, 0.3394317626953125, 0.44362640380859375, -0.14771270751953125, 0.25092506408691406, -0.752410888671875, 0.0811920166015625, -0.00994873046875, -0.467681884765625, 0.33849334716796875, -0.586883544921875, 0.412628173828125, 0.21759033203125, 0.4282798767089844, -0.6349372863769531, 0.258087158203125, -0.42649078369140625, 0.3928375244140625, -0.14083099365234375, -0.5999908447265625, 0.6182098388671875, -0.04772186279296875, 0.08692169189453125, -0.1614837646484375, -0.055530548095703125, 0.7113742828369141, 0.224334716796875, 0.6289138793945312, 0.2887115478515625, -0.402435302734375, -0.1265106201171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000007.npy"}
|
||||
{"epoch": 0.010279001468428781, "step": 8, "batch_size": 64, "mean": -0.06675609946250916, "std": 0.47908222675323486, "min": -1.8746337890625, "p10": -0.4663200378417968, "median": -0.03059864044189453, "p90": 0.46268768310546887, "max": 1.20831298828125, "pos_frac": 0.4375, "sample": [0.496002197265625, 0.57891845703125, -0.39013671875, -0.22651100158691406, -0.00994110107421875, -0.05598640441894531, 0.0227508544921875, 0.20383453369140625, 0.811737060546875, 0.3819694519042969, -0.03282928466796875, 0.4108619689941406, 0.358612060546875, 0.027980804443359375, 0.5872116088867188, -0.49897003173828125, 0.14293670654296875, -0.22927474975585938, 0.4324493408203125, -0.684356689453125, 0.0155487060546875, -1.37628173828125, -0.11887359619140625, -0.1641998291015625, -0.028367996215820312, -0.1390533447265625, -0.365631103515625, -0.24326133728027344, -0.17742919921875, -0.9927978515625, 0.180084228515625, -0.28577423095703125, -0.16224288940429688, 0.47564697265625, 0.07024383544921875, -0.8657684326171875, -0.15587615966796875, 0.170562744140625, -0.12148284912109375, 0.1013946533203125, 0.5554084777832031, -0.312652587890625, -0.17198944091796875, 0.303375244140625, -0.024442672729492188, -1.8746337890625, -1.02923583984375, 0.05600738525390625, 0.05843925476074219, 0.05811309814453125, -0.3547935485839844, -0.02099609375, -0.3747406005859375, -0.2860527038574219, 1.20831298828125, -0.24434661865234375, -0.344573974609375, 0.02495574951171875, 0.24810791015625, -0.1152801513671875, -0.08338165283203125, 0.39325904846191406, -0.12433242797851562, 0.0393829345703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000008.npy"}
|
||||
{"epoch": 0.011747430249632892, "step": 9, "batch_size": 64, "mean": 0.04486680030822754, "std": 0.4689461886882782, "min": -0.9081192016601562, "p10": -0.4981060028076172, "median": -0.015382766723632812, "p90": 0.7157386779785161, "max": 1.370941162109375, "pos_frac": 0.453125, "sample": [-0.04701995849609375, -0.08383941650390625, 0.5558547973632812, 0.0529632568359375, 0.1340179443359375, -0.31787109375, -0.123291015625, -0.010379791259765625, 0.23369598388671875, 0.573211669921875, -0.9081192016601562, -0.135833740234375, -0.3350372314453125, -0.38775634765625, -0.498626708984375, -0.542694091796875, 0.5618057250976562, -0.2587928771972656, -0.2041473388671875, -0.46242523193359375, -0.4968910217285156, 0.10725975036621094, 0.25360107421875, 0.39278411865234375, 0.428375244140625, 0.7698440551757812, 0.5016555786132812, -0.17193603515625, -0.10378074645996094, 0.7988090515136719, -0.26000213623046875, -0.3418560028076172, -0.1324310302734375, 0.34798431396484375, -0.1807384490966797, -0.7216110229492188, -0.08886528015136719, -0.010862350463867188, 0.15771484375, 0.8534622192382812, -0.28131866455078125, -0.102783203125, -0.64422607421875, 0.2999744415283203, 0.8234481811523438, -0.4159126281738281, -0.109832763671875, -0.282135009765625, -0.01845550537109375, 0.13504600524902344, 0.1037139892578125, 0.4398345947265625, 0.5894927978515625, 0.7965068817138672, 0.3904743194580078, 0.2211761474609375, 0.513824462890625, 0.1080780029296875, -0.012310028076171875, 1.370941162109375, -0.8741912841796875, 0.9115676879882812, -0.6856536865234375, -0.30401611328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000009.npy"}
|
||||
{"epoch": 0.013215859030837005, "step": 10, "batch_size": 64, "mean": 0.010898619890213013, "std": 0.3509206771850586, "min": -0.92779541015625, "p10": -0.43809661865234373, "median": 0.034397125244140625, "p90": 0.3877540588378907, "max": 0.947113037109375, "pos_frac": 0.578125, "sample": [-0.1223907470703125, -0.15765380859375, -0.11557197570800781, 0.0459747314453125, -0.6266326904296875, 0.24504852294921875, 0.08158302307128906, 0.1091461181640625, 0.0791015625, -0.75146484375, -0.24169158935546875, 0.2315521240234375, 0.08174896240234375, 0.4204864501953125, -0.9104690551757812, 0.272796630859375, -0.02364349365234375, 0.00060272216796875, 0.18499183654785156, 0.04277801513671875, 0.19731903076171875, -0.16713714599609375, 0.3962249755859375, 0.08259391784667969, -0.14957809448242188, -0.1997833251953125, -0.1796722412109375, -0.4497833251953125, 0.07325935363769531, 0.4877586364746094, -0.001190185546875, 0.13477325439453125, 0.0260162353515625, 0.291748046875, -0.15260696411132812, -0.3619956970214844, -0.92779541015625, 0.746246337890625, 0.29174041748046875, 0.016330718994140625, -0.09088897705078125, -0.41082763671875, -0.13222122192382812, -0.561553955078125, 0.1157989501953125, 0.49604034423828125, 0.30355072021484375, 0.36798858642578125, -0.5321273803710938, -0.1103973388671875, 0.29192352294921875, -0.011606216430664062, 0.0076446533203125, 0.3494148254394531, -0.17657470703125, -0.07057380676269531, 0.5417098999023438, 0.1631317138671875, 0.006866455078125, 0.947113037109375, 0.2855377197265625, -0.38059043884277344, 0.1522502899169922, 0.1451416015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000010.npy"}
|
||||
{"epoch": 0.014684287812041116, "step": 11, "batch_size": 64, "mean": -0.0021438300609588623, "std": 0.42058265209198, "min": -1.602783203125, "p10": -0.47995758056640625, "median": 0.02982330322265625, "p90": 0.4522550582885745, "max": 1.2443389892578125, "pos_frac": 0.53125, "sample": [-0.5373077392578125, 0.8910675048828125, 0.48175048828125, 0.2575492858886719, -0.00738525390625, 0.05219268798828125, 0.21181488037109375, 0.16829681396484375, -0.12884140014648438, 0.01953125, 0.2110748291015625, 0.11391830444335938, -0.295013427734375, 0.37500762939453125, 0.1318359375, 0.0591583251953125, 0.2777099609375, -0.08592605590820312, 0.531402587890625, 0.3409767150878906, 0.3834857940673828, 0.19478607177734375, -0.43502235412597656, -0.039947509765625, -0.342010498046875, 0.32387542724609375, 0.09609222412109375, -0.58294677734375, 0.026214599609375, 0.3353004455566406, 0.3179779052734375, -0.746307373046875, 0.3101844787597656, -0.184783935546875, -0.1096649169921875, 0.03814697265625, 0.48172760009765625, 0.5416793823242188, -0.4799041748046875, -0.1508159637451172, -0.24485397338867188, -0.11817169189453125, -0.16374969482421875, -0.716796875, 0.20481109619140625, 0.15570831298828125, 0.6775741577148438, -0.06166267395019531, -1.602783203125, 0.08678436279296875, -0.13307952880859375, -0.47998046875, 0.0334320068359375, -0.6517715454101562, 0.11272048950195312, -0.2563133239746094, -0.12106704711914062, -0.21692657470703125, -0.11600494384765625, 1.2443389892578125, -0.3728179931640625, 0.06646919250488281, -0.09986114501953125, -0.4100837707519531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000011.npy"}
|
||||
{"epoch": 0.016152716593245228, "step": 12, "batch_size": 64, "mean": 0.0008526891469955444, "std": 0.34676823019981384, "min": -0.8912353515625, "p10": -0.45575485229492185, "median": 0.024866104125976562, "p90": 0.39233551025390634, "max": 0.801666259765625, "pos_frac": 0.53125, "sample": [0.801666259765625, -0.052764892578125, 0.30525970458984375, -0.6031646728515625, -0.1665496826171875, -0.01507568359375, 0.1254425048828125, 0.25146484375, -0.10025787353515625, 0.6085395812988281, 0.1500091552734375, -0.5539703369140625, -0.398529052734375, 0.15702056884765625, -0.081756591796875, 0.5852432250976562, -0.009552001953125, 0.205902099609375, -0.07745170593261719, -0.0780029296875, -0.8912353515625, -0.3318023681640625, -0.4866943359375, -0.6842575073242188, -0.295196533203125, 0.15395736694335938, 0.2031707763671875, 0.4018707275390625, 0.0743255615234375, -0.47104644775390625, 0.2057952880859375, 0.2466278076171875, 0.06932449340820312, 0.2429676055908203, 0.370086669921875, 0.18226242065429688, 0.20254898071289062, -0.24981689453125, -0.29582977294921875, -0.1760730743408203, -0.367950439453125, 0.17668533325195312, -0.4052753448486328, 0.2955818176269531, 0.11685943603515625, 0.04346466064453125, 0.7606964111328125, 0.042652130126953125, 0.20645999908447266, 0.0650787353515625, -0.03275299072265625, 0.7742538452148438, 0.13767242431640625, 0.41765594482421875, -0.207977294921875, -0.49383544921875, 0.007080078125, -0.2987823486328125, -0.06473350524902344, -0.14650726318359375, 0.105255126953125, -0.18754196166992188, -0.420074462890625, 0.0061492919921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000012.npy"}
|
||||
{"epoch": 0.01762114537444934, "step": 13, "batch_size": 64, "mean": 0.061279088258743286, "std": 0.4573952555656433, "min": -1.561431884765625, "p10": -0.37520751953125, "median": 0.016699790954589844, "p90": 0.46484146118164077, "max": 1.6549301147460938, "pos_frac": 0.53125, "sample": [0.1220855712890625, -0.10704994201660156, 0.25603485107421875, -0.3028717041015625, 0.229827880859375, 0.4806938171386719, 0.3888702392578125, -0.04447364807128906, 0.7571029663085938, 0.19808578491210938, 0.5288619995117188, 0.9396209716796875, -0.39174652099609375, -0.33661651611328125, -0.2950172424316406, 1.6549301147460938, -0.4322662353515625, -0.171875, -0.09416389465332031, 0.339508056640625, -0.1836109161376953, 0.3298492431640625, -0.4017486572265625, -0.027362823486328125, 0.07223320007324219, -0.1067657470703125, 0.024929046630859375, -0.1934223175048828, -0.3205680847167969, -0.3932342529296875, -0.20317840576171875, 0.202972412109375, 0.15372467041015625, 0.10444259643554688, -1.561431884765625, -0.23455047607421875, -0.2807769775390625, 0.4817047119140625, -0.2641315460205078, 0.103912353515625, 0.4278526306152344, 0.15764617919921875, -0.13421630859375, -0.23464202880859375, 0.36466217041015625, 0.008470535278320312, -0.02608489990234375, 0.24178504943847656, 0.3563423156738281, 0.00714874267578125, -0.528106689453125, 0.2627716064453125, 0.093170166015625, -0.0226287841796875, -0.29701995849609375, 0.2069091796875, 0.251708984375, 0.37561798095703125, 1.50335693359375, -0.0155792236328125, -0.2869110107421875, 0.3036346435546875, 0.4151153564453125, -0.5316696166992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000013.npy"}
|
||||
{"epoch": 0.01908957415565345, "step": 14, "batch_size": 64, "mean": 0.021842211484909058, "std": 0.34489062428474426, "min": -0.734710693359375, "p10": -0.43193359374999996, "median": 0.04969596862792969, "p90": 0.4058456420898439, "max": 0.7517547607421875, "pos_frac": 0.53125, "sample": [-0.44061279296875, -0.02402496337890625, 0.13491249084472656, -0.2089691162109375, 0.7517547607421875, -0.2097930908203125, -0.734710693359375, -0.41130828857421875, 0.7032012939453125, 0.27880859375, 0.36696624755859375, 0.06336212158203125, 0.6293182373046875, 0.248687744140625, -0.4853248596191406, -0.08844757080078125, -0.08235931396484375, -0.23180389404296875, 0.051288604736328125, -0.011896133422851562, 0.3152008056640625, -0.06915283203125, 0.42250823974609375, -0.058929443359375, -0.323577880859375, -0.1138458251953125, -0.6356201171875, 0.19847488403320312, 0.2648468017578125, 0.5945663452148438, 0.08269882202148438, -0.1856365203857422, 0.2945709228515625, 0.11574935913085938, 0.21610450744628906, -0.02217864990234375, 0.22323989868164062, -0.35321044921875, -0.10290718078613281, 0.04810333251953125, 0.350555419921875, -0.41168212890625, -0.4467010498046875, 0.48981475830078125, 0.2503929138183594, 0.33637237548828125, 0.3461132049560547, -0.6488571166992188, -0.254302978515625, -0.32155609130859375, -0.7107086181640625, 0.17986297607421875, 0.2840118408203125, 0.16788482666015625, -0.224578857421875, 0.5309600830078125, 0.28179931640625, 0.0439910888671875, -0.35358428955078125, 0.28192138671875, -0.1885814666748047, 0.11767387390136719, 0.12062835693359375, -0.03358268737792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000014.npy"}
|
||||
{"epoch": 0.020558002936857563, "step": 15, "batch_size": 64, "mean": -0.017623186111450195, "std": 0.4012446403503418, "min": -1.5726242065429688, "p10": -0.5454650878906249, "median": 0.01195526123046875, "p90": 0.39934329986572287, "max": 0.7709579467773438, "pos_frac": 0.515625, "sample": [-0.05806922912597656, -0.20361328125, 0.20895004272460938, 0.2935657501220703, 0.278656005859375, 0.1763439178466797, -0.13550949096679688, -0.028835296630859375, -0.059326171875, 0.72149658203125, -0.2027740478515625, -0.252471923828125, 0.11908721923828125, -0.5892181396484375, 0.41908836364746094, 0.324737548828125, 0.33556365966796875, -0.27362823486328125, 0.10589790344238281, 0.06233978271484375, 0.119903564453125, -1.5726242065429688, -0.33148956298828125, 0.6001358032226562, -0.6218128204345703, -0.10301971435546875, 0.23191070556640625, 0.14885711669921875, -0.29769134521484375, 0.7108688354492188, -0.019683837890625, -0.9639434814453125, -0.6679000854492188, -0.05470466613769531, 0.039093017578125, 0.0514678955078125, -0.4433746337890625, 0.7709579467773438, 0.0060882568359375, -0.22031021118164062, -0.0323638916015625, 0.090850830078125, 0.1665515899658203, 0.21370506286621094, -0.02274322509765625, -0.08484268188476562, 0.2681446075439453, -0.24361419677734375, 0.5279617309570312, -0.14565277099609375, 0.1660003662109375, 0.017822265625, 0.5286865234375, 0.11658477783203125, 0.1647186279296875, 0.353271484375, -0.254791259765625, -0.1146240234375, 0.16510772705078125, -0.6669769287109375, -0.40872955322265625, -0.7394676208496094, -0.020977020263671875, 0.202484130859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000015.npy"}
|
||||
{"epoch": 0.022026431718061675, "step": 16, "batch_size": 64, "mean": 0.016786575317382812, "std": 0.38721632957458496, "min": -0.863250732421875, "p10": -0.46660041809082026, "median": 0.014174461364746094, "p90": 0.3950428009033204, "max": 1.3713531494140625, "pos_frac": 0.53125, "sample": [0.1791515350341797, 1.3713531494140625, -0.2656402587890625, 0.05759429931640625, 0.3819236755371094, 0.9162368774414062, -0.193695068359375, -0.03982353210449219, 0.128875732421875, 0.18773651123046875, -0.5764312744140625, 0.01496124267578125, 0.18662643432617188, -0.03804779052734375, 0.013387680053710938, -0.598175048828125, -0.3739051818847656, -0.4165191650390625, -0.30437469482421875, 0.32678985595703125, 0.26335906982421875, 0.2675895690917969, 0.4814796447753906, 0.36592864990234375, 0.2686119079589844, -0.3044929504394531, -0.49190521240234375, -0.4841499328613281, -0.07135200500488281, -0.26750946044921875, 0.22983551025390625, 0.2878303527832031, 0.000148773193359375, -0.20579147338867188, 0.20528411865234375, 0.12607574462890625, 0.46538543701171875, 0.2749481201171875, -0.23508453369140625, -0.029541015625, 0.369781494140625, 0.1952362060546875, 0.12644004821777344, -0.02191162109375, 0.378692626953125, -0.14931106567382812, -0.2695732116699219, -0.20809555053710938, 0.16027069091796875, 0.143402099609375, -0.42565155029296875, -0.688873291015625, -0.33123779296875, -0.3338356018066406, -0.09099960327148438, -0.5596084594726562, 0.29944801330566406, 0.400665283203125, 0.5903892517089844, 0.5263824462890625, 0.2093505859375, -0.22084808349609375, -0.2671966552734375, -0.863250732421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000016.npy"}
|
||||
{"epoch": 0.023494860499265784, "step": 17, "batch_size": 64, "mean": 0.09929636120796204, "std": 0.3600854277610779, "min": -0.827545166015625, "p10": -0.43471813201904297, "median": 0.12248420715332031, "p90": 0.5412647247314454, "max": 0.823699951171875, "pos_frac": 0.703125, "sample": [-0.72216796875, -0.015764236450195312, 0.5465049743652344, 0.04975128173828125, -0.7012481689453125, -0.15966796875, 0.25244140625, -0.827545166015625, 0.3618316650390625, -0.5089950561523438, 0.6592178344726562, 0.3532867431640625, 0.140472412109375, 0.76605224609375, 0.2473602294921875, 0.122955322265625, 0.12201309204101562, 0.2874794006347656, 0.16664886474609375, 0.49005889892578125, 0.3737831115722656, 0.2732582092285156, 0.10744094848632812, -0.01690673828125, 0.00827789306640625, 0.15050506591796875, 0.2850456237792969, -0.11480712890625, 0.7548065185546875, -0.171661376953125, -0.03678131103515625, 0.5290374755859375, 0.10514068603515625, 0.30893707275390625, -0.1547393798828125, 0.7432022094726562, -0.24654769897460938, -0.4406147003173828, 0.19457244873046875, 0.05718994140625, 0.2081298828125, 0.18880462646484375, 0.19055557250976562, 0.037639617919921875, -0.6755828857421875, 0.0610809326171875, -0.04666900634765625, -0.4948883056640625, 0.00666046142578125, 0.06600189208984375, 0.097686767578125, 0.13376235961914062, 0.07684326171875, 0.305877685546875, 0.260223388671875, -0.42095947265625, 0.29229736328125, 0.5550880432128906, 0.823699951171875, -0.22843170166015625, 0.19370079040527344, -0.1668548583984375, 0.49776458740234375, 0.05271148681640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000017.npy"}
|
||||
{"epoch": 0.024963289280469897, "step": 18, "batch_size": 64, "mean": 0.05686900019645691, "std": 0.3101457953453064, "min": -0.80499267578125, "p10": -0.29288291931152344, "median": 0.049887657165527344, "p90": 0.4292278289794922, "max": 0.684967041015625, "pos_frac": 0.5625, "sample": [0.04390716552734375, -0.46121978759765625, 0.6697158813476562, -0.13033294677734375, -0.1910247802734375, -0.041126251220703125, -0.22491455078125, -0.019622802734375, -0.1902923583984375, 0.4796028137207031, -0.80499267578125, 0.09406280517578125, 0.431304931640625, 0.3408660888671875, -0.1706390380859375, 0.533355712890625, -0.2976036071777344, 0.07506179809570312, -0.2165679931640625, 0.24311065673828125, -0.052318572998046875, -0.3029022216796875, 0.2725677490234375, -0.08175849914550781, 0.4243812561035156, 0.25745391845703125, 0.3671722412109375, 0.09857940673828125, -0.009614944458007812, 0.06471824645996094, -0.07248687744140625, 0.21966934204101562, -0.28186798095703125, 0.188629150390625, 0.4890289306640625, -0.032299041748046875, 0.4019927978515625, 0.3745536804199219, -0.02312469482421875, 0.05461883544921875, 0.45305824279785156, 0.09679412841796875, -0.45407867431640625, -0.06961822509765625, 0.0535888671875, 0.684967041015625, 0.18536758422851562, 0.33693504333496094, -0.072998046875, 0.11085891723632812, 0.3647499084472656, -0.11977195739746094, -0.7802734375, -0.25081634521484375, 0.35318756103515625, 0.21102142333984375, 0.3770599365234375, 0.015565872192382812, -0.5023651123046875, 0.2822151184082031, 0.04618644714355469, -0.19122695922851562, 0.04555511474609375, -0.05599021911621094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000018.npy"}
|
||||
{"epoch": 0.02643171806167401, "step": 19, "batch_size": 64, "mean": 0.11293420195579529, "std": 0.39604896306991577, "min": -1.2044830322265625, "p10": -0.3118961334228515, "median": 0.12566566467285156, "p90": 0.6064468383789064, "max": 0.9469451904296875, "pos_frac": 0.671875, "sample": [0.2720794677734375, 0.2032794952392578, -0.06836700439453125, 0.0017547607421875, -0.26239776611328125, -0.084014892578125, 0.1305999755859375, -0.4168205261230469, 0.14213180541992188, 0.20325469970703125, 0.17994308471679688, -0.2732276916503906, 0.20635604858398438, 0.429931640625, 0.3072357177734375, -1.2044830322265625, 0.34710693359375, 0.30889892578125, -0.2469024658203125, -0.095611572265625, 0.517852783203125, -0.0109405517578125, 0.2151641845703125, 0.75067138671875, 0.18287086486816406, 0.1921367645263672, -0.5120468139648438, 0.19799041748046875, 0.1470794677734375, -0.01998138427734375, 0.39429473876953125, 0.009111404418945312, 0.010652542114257812, 0.12073135375976562, 0.9238510131835938, -0.32846832275390625, -0.4732513427734375, 0.0370025634765625, 0.1173248291015625, 0.493804931640625, 0.28314208984375, 0.70184326171875, -0.0031890869140625, -0.1812744140625, 0.03410530090332031, 0.9469451904296875, 0.3265571594238281, 0.630126953125, 0.05078125, 0.4975128173828125, 0.912811279296875, -0.08620834350585938, 0.13402557373046875, -0.09822463989257812, 0.81195068359375, 0.07309150695800781, -0.2617340087890625, -0.3322410583496094, 0.0462799072265625, -1.026336669921875, 0.5511932373046875, -0.020610809326171875, 0.15721893310546875, 0.03342437744140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000019.npy"}
|
||||
{"epoch": 0.027900146842878122, "step": 20, "batch_size": 64, "mean": 0.0998302698135376, "std": 0.3317307233810425, "min": -1.0104293823242188, "p10": -0.28984909057617186, "median": 0.0730733871459961, "p90": 0.4701065063476563, "max": 0.9316864013671875, "pos_frac": 0.671875, "sample": [0.057582855224609375, 0.23141098022460938, -0.006488800048828125, 0.18634033203125, 0.01474761962890625, 0.3693084716796875, 0.29482269287109375, -0.13469696044921875, 0.11853790283203125, 0.43012237548828125, -0.5548477172851562, 0.1746978759765625, 0.22563934326171875, 0.2854461669921875, 0.21934127807617188, 0.01229095458984375, 0.07136154174804688, 0.2881050109863281, -0.3321876525878906, -0.106292724609375, 0.4467315673828125, -0.08416748046875, 0.066162109375, -0.0679779052734375, -0.5048370361328125, 0.030992507934570312, 0.4786529541015625, 0.2269439697265625, -0.122711181640625, 0.07478523254394531, -0.10586357116699219, -0.2783775329589844, -0.3799705505371094, 0.5604457855224609, 0.35283660888671875, -0.5430145263671875, 0.5656585693359375, -1.0104293823242188, -0.2947654724121094, -0.1632843017578125, 0.24573516845703125, 0.0325775146484375, 0.03688812255859375, -0.027629852294921875, -0.030521392822265625, 0.14986419677734375, 0.5467681884765625, 0.9080352783203125, 0.6250076293945312, -0.12308502197265625, 0.0514373779296875, 0.37094879150390625, 0.30149078369140625, 0.03759765625, 0.9316864013671875, -0.2056293487548828, 0.450164794921875, 0.2695941925048828, -0.07779502868652344, 0.07985305786132812, 0.21150588989257812, 0.10394287109375, 0.3458442687988281, 0.06180381774902344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000020.npy"}
|
||||
{"epoch": 0.02936857562408223, "step": 21, "batch_size": 64, "mean": 0.10462629795074463, "std": 0.37146177887916565, "min": -0.9378890991210938, "p10": -0.2201000213623047, "median": 0.06570816040039062, "p90": 0.666172027587891, "max": 1.0224609375, "pos_frac": 0.59375, "sample": [0.546722412109375, 0.1352672576904297, 0.58477783203125, 0.84478759765625, 0.7371978759765625, -0.08111000061035156, -0.03908538818359375, 0.053863525390625, 0.697723388671875, 0.1389007568359375, 0.257904052734375, 0.3295440673828125, 0.25969696044921875, 0.20929336547851562, 0.3227500915527344, 0.5925521850585938, 0.1277618408203125, -0.10798263549804688, 0.023080825805664062, 0.12097930908203125, -0.231658935546875, 0.277435302734375, 0.218597412109375, -0.08354568481445312, -0.10222434997558594, 1.0224609375, -0.4655914306640625, -0.12164878845214844, -0.13383865356445312, 0.0146026611328125, -0.9378890991210938, 0.19686126708984375, -0.15532302856445312, 0.22036361694335938, 0.18634796142578125, -0.17905807495117188, -0.00746917724609375, 0.5629405975341797, 0.7080230712890625, -0.1606597900390625, 0.04274749755859375, 0.07755279541015625, -0.2193603515625, 0.023344039916992188, 0.10590362548828125, -0.22041702270507812, -0.19925308227539062, -0.06640625, 0.04005241394042969, -0.8892745971679688, -0.0294647216796875, 0.3585662841796875, 0.8631935119628906, 0.1910858154296875, -0.06293487548828125, -0.2796440124511719, -0.0287628173828125, -0.1072998046875, 0.7784347534179688, 0.10295295715332031, 0.09047698974609375, -0.5554428100585938, 0.15814208984375, -0.06146049499511719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000021.npy"}
|
||||
{"epoch": 0.030837004405286344, "step": 22, "batch_size": 64, "mean": 0.22439703345298767, "std": 0.37970617413520813, "min": -0.6068878173828125, "p10": -0.2606744766235351, "median": 0.22269058227539062, "p90": 0.7412811279296876, "max": 1.0443038940429688, "pos_frac": 0.734375, "sample": [0.23342132568359375, 0.40447998046875, 0.3388786315917969, 0.1977100372314453, 0.219329833984375, -0.394561767578125, 0.17913818359375, 0.17531776428222656, 0.0039005279541015625, 0.491668701171875, 1.0342864990234375, 0.36395263671875, -0.3720703125, 0.7935714721679688, -0.14838600158691406, -0.13037872314453125, 0.33269500732421875, 0.2882404327392578, -0.160003662109375, -0.13531494140625, 0.2823944091796875, 0.803497314453125, 0.133453369140625, -0.0624847412109375, 0.08047866821289062, 0.19295310974121094, 1.0443038940429688, -0.5489463806152344, 0.1584014892578125, 0.1956024169921875, 0.38654136657714844, 0.339141845703125, 0.385223388671875, -0.08498382568359375, 0.4972057342529297, 0.22605133056640625, 0.15508651733398438, -0.35044097900390625, -0.12910842895507812, 0.721649169921875, 0.28094482421875, 0.45227813720703125, 0.2640247344970703, 0.026012420654296875, -0.1954059600830078, 0.700531005859375, 0.20636749267578125, -0.6068878173828125, 0.7005767822265625, 0.23359298706054688, 0.020486831665039062, 0.953399658203125, -0.2886466979980469, -0.05322074890136719, -0.024169921875, 0.645538330078125, 0.4238739013671875, 0.1481170654296875, 0.5058975219726562, 0.308349609375, 0.3148193359375, 0.9700851440429688, 0.74969482421875, -0.5167446136474609], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000022.npy"}
|
||||
{"epoch": 0.032305433186490456, "step": 23, "batch_size": 64, "mean": 0.1467902660369873, "std": 0.46301591396331787, "min": -1.548126220703125, "p10": -0.3024099349975585, "median": 0.12230587005615234, "p90": 0.6603431701660156, "max": 1.5241928100585938, "pos_frac": 0.734375, "sample": [0.003879547119140625, 0.03089141845703125, 0.5865936279296875, 0.06989669799804688, 0.3020133972167969, 0.5431556701660156, 0.13614654541015625, 0.0372772216796875, -0.4854774475097656, -0.243743896484375, 0.480255126953125, 1.5241928100585938, 0.947113037109375, 0.00716400146484375, 0.36661529541015625, 0.1964111328125, -0.2470989227294922, 0.3145484924316406, 0.2441558837890625, -0.17110824584960938, 0.6646347045898438, -0.347686767578125, 0.678466796875, -0.08047866821289062, -0.6335887908935547, 0.234649658203125, -0.7099685668945312, -0.9809074401855469, 0.5538406372070312, -0.15313720703125, 0.0182037353515625, 0.525787353515625, 0.37078094482421875, -0.09206199645996094, 0.10846519470214844, 0.05461883544921875, 0.2638702392578125, 0.26892852783203125, 0.0679168701171875, 0.500396728515625, 0.08583450317382812, 0.05757331848144531, 0.045093536376953125, 0.5511245727539062, 0.14159774780273438, 0.9228057861328125, -0.12976455688476562, -1.548126220703125, 0.1843719482421875, -0.136627197265625, 0.4026813507080078, 0.044315338134765625, 0.0230560302734375, -0.3261146545410156, 0.1442127227783203, 0.25913238525390625, 0.25376129150390625, 0.44989013671875, -0.1340503692626953, 0.65032958984375, 0.0323486328125, -0.1537933349609375, 0.776824951171875, 0.8424873352050781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000023.npy"}
|
||||
{"epoch": 0.033773861967694566, "step": 24, "batch_size": 64, "mean": 0.22320020198822021, "std": 0.402773380279541, "min": -0.6936569213867188, "p10": -0.2961654663085937, "median": 0.19123077392578125, "p90": 0.7158939361572266, "max": 1.315521240234375, "pos_frac": 0.734375, "sample": [0.4739990234375, 0.5150337219238281, -0.07292747497558594, -0.041263580322265625, 0.127471923828125, 0.008403778076171875, 0.899505615234375, -0.3472900390625, 0.467681884765625, 0.2538719177246094, 0.200042724609375, -0.4091911315917969, 0.09105873107910156, 0.7195549011230469, 0.09796524047851562, -0.0875091552734375, 0.030376434326171875, 0.07292556762695312, 0.7073516845703125, -0.264068603515625, 0.4104766845703125, -0.6936569213867188, -0.23108863830566406, 0.12831878662109375, 0.2064971923828125, 0.698883056640625, 0.15404319763183594, 0.21343994140625, 0.5084075927734375, 0.5788421630859375, 1.315521240234375, 0.854248046875, -0.08236312866210938, -0.4307708740234375, 0.07297515869140625, -0.55816650390625, -0.16912841796875, 0.127197265625, 0.06626129150390625, -0.0202484130859375, 0.9213104248046875, 0.2735252380371094, 0.1824188232421875, 0.3262481689453125, 0.34758758544921875, 1.17071533203125, 0.6554412841796875, 0.01650238037109375, 0.90380859375, 0.21118545532226562, 0.6292495727539062, -0.053363800048828125, 0.30200958251953125, 0.25659751892089844, 0.2701568603515625, -0.060272216796875, 0.4400615692138672, 0.36255645751953125, 0.43004608154296875, -0.36792755126953125, 0.1646728515625, -0.3099212646484375, 0.0395660400390625, 0.5799560546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000024.npy"}
|
||||
{"epoch": 0.03524229074889868, "step": 25, "batch_size": 64, "mean": 0.22116953134536743, "std": 0.4490810036659241, "min": -0.61004638671875, "p10": -0.2357868194580078, "median": 0.1379718780517578, "p90": 0.7662322998046877, "max": 2.06427001953125, "pos_frac": 0.671875, "sample": [0.21707534790039062, 0.3070793151855469, 0.805419921875, -0.23722076416015625, 0.14038467407226562, -0.15633010864257812, -0.20439910888671875, 2.06427001953125, -0.212615966796875, 0.06940078735351562, -0.0272674560546875, 0.29193115234375, -0.004238128662109375, 0.2109832763671875, 0.9476852416992188, -0.5475368499755859, -0.24004554748535156, 0.6098098754882812, 0.10115814208984375, 1.0054473876953125, -0.2262115478515625, 0.1264495849609375, 0.1828899383544922, 0.09453582763671875, 0.107177734375, 0.2188720703125, 0.5089874267578125, 0.6143646240234375, -0.2612152099609375, 0.009927749633789062, 0.25208282470703125, 0.6233291625976562, 0.053714752197265625, 0.4053497314453125, -0.22042083740234375, -0.23235321044921875, 0.05461692810058594, 0.8665771484375, -0.1255340576171875, 0.34125518798828125, -0.33049583435058594, 0.6986465454101562, 0.28397369384765625, 0.2572212219238281, -0.23244094848632812, -0.18737030029296875, 0.06612396240234375, 0.373046875, 0.5647125244140625, 0.4203071594238281, 1.0803680419921875, 0.7772369384765625, 0.13555908203125, -0.61004638671875, 0.623748779296875, 0.11607933044433594, -0.036041259765625, 0.7367095947265625, 0.7405548095703125, -0.0752716064453125, 0.37126731872558594, 0.3094329833984375, -0.40122222900390625, -0.0626373291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000025.npy"}
|
||||
{"epoch": 0.03671071953010279, "step": 26, "batch_size": 64, "mean": 0.40158015489578247, "std": 0.5813688635826111, "min": -1.1492462158203125, "p10": -0.23418464660644528, "median": 0.2835073471069336, "p90": 1.3271484375000004, "max": 1.807586669921875, "pos_frac": 0.734375, "sample": [0.26837158203125, 0.07583808898925781, 0.18416213989257812, 0.5594310760498047, 1.108123779296875, 0.26165008544921875, -0.1209259033203125, 0.8783721923828125, -0.294891357421875, -0.2542304992675781, 1.2155303955078125, 0.07894134521484375, 1.807586669921875, 0.38097381591796875, -0.023162841796875, 1.72015380859375, 0.9464645385742188, 0.17942047119140625, 0.581939697265625, -0.24375534057617188, 0.1301746368408203, -0.1689453125, 0.4237213134765625, 0.7709121704101562, -0.019287109375, 1.3927154541015625, 0.1481781005859375, -1.1492462158203125, 0.2986431121826172, 0.7373046875, -0.10262298583984375, -0.00041961669921875, 0.3846263885498047, 0.3659553527832031, -0.09452247619628906, 0.3298492431640625, 1.562591552734375, 0.26409912109375, 0.4042930603027344, 1.3749847412109375, 0.42971038818359375, 0.2078094482421875, 1.734130859375, 0.5643768310546875, 0.35453033447265625, 1.4014434814453125, -0.3791465759277344, 0.1725006103515625, 0.1091766357421875, -0.0050983428955078125, -0.020763397216796875, 0.11617279052734375, -0.2761974334716797, 0.5325393676757812, -0.21185302734375, 0.46038818359375, 0.9353866577148438, 1.000152587890625, 1.0299835205078125, 0.06497573852539062, 0.16949081420898438, -0.3465385437011719, 0.5629196166992188, 0.7320404052734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000026.npy"}
|
||||
{"epoch": 0.0381791483113069, "step": 27, "batch_size": 64, "mean": 0.5132263898849487, "std": 0.5736640691757202, "min": -0.8043670654296875, "p10": -0.08888587951660153, "median": 0.40970325469970703, "p90": 1.170953369140625, "max": 2.3391799926757812, "pos_frac": 0.84375, "sample": [0.10695648193359375, 0.9900588989257812, 0.7182083129882812, 0.6809616088867188, 0.6716270446777344, 1.1599884033203125, 0.16571044921875, -0.0552520751953125, 1.4199905395507812, -0.2798442840576172, 0.83612060546875, 0.32613372802734375, 0.325897216796875, 0.2969036102294922, 0.402740478515625, 0.243865966796875, 2.3391799926757812, 0.38199615478515625, 0.4209403991699219, 0.17266845703125, -0.18584442138671875, -0.0567169189453125, 0.35794830322265625, -0.311614990234375, 0.35890960693359375, 0.41666603088378906, 0.03230857849121094, 0.03765869140625, 0.8281822204589844, 2.1421127319335938, 0.3919830322265625, 0.015802383422851562, -0.10267257690429688, 2.071136474609375, 0.558441162109375, 1.2005538940429688, 1.0322265625, 0.587188720703125, 0.45034027099609375, 0.3512077331542969, -0.4025726318359375, 0.8390274047851562, 0.200225830078125, 0.6928939819335938, 0.20025634765625, 0.65185546875, -0.8043670654296875, 0.4685039520263672, 1.1647262573242188, 0.9558868408203125, 1.327880859375, 0.2576484680175781, 0.2883453369140625, 0.690093994140625, 0.67999267578125, 0.49684906005859375, 1.1736221313476562, -0.13791465759277344, 0.5488929748535156, 0.246917724609375, 0.47397613525390625, 0.9851303100585938, -0.042266845703125, 0.3902130126953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000027.npy"}
|
||||
{"epoch": 0.039647577092511016, "step": 28, "batch_size": 64, "mean": 0.32913774251937866, "std": 0.6369958519935608, "min": -2.04827880859375, "p10": -0.310635757446289, "median": 0.2778129577636719, "p90": 1.1896972656250002, "max": 1.8759918212890625, "pos_frac": 0.671875, "sample": [0.5061454772949219, 0.2642974853515625, -0.012115478515625, 0.5277786254882812, 1.0087451934814453, -0.1164703369140625, -0.2759246826171875, 0.16132736206054688, -0.3745765686035156, 0.2512054443359375, -0.13594818115234375, 0.8706512451171875, 1.2159423828125, -0.10254669189453125, 0.6830978393554688, -0.3255119323730469, 0.2859649658203125, 0.7792816162109375, -0.0644073486328125, 0.26966094970703125, 0.3609733581542969, -0.46894073486328125, 0.03980255126953125, 1.2099761962890625, -0.01279449462890625, 0.7466354370117188, 0.7201080322265625, 0.87298583984375, 1.2788772583007812, 0.5294952392578125, 0.39263153076171875, 0.524566650390625, -0.10486602783203125, 0.119110107421875, 0.7442359924316406, 0.3870735168457031, 0.7718906402587891, 1.2410125732421875, 0.11983108520507812, -0.11624908447265625, 0.12928390502929688, 0.45693397521972656, -0.020122528076171875, 0.873504638671875, 1.5539703369140625, 1.250091552734375, 0.7324600219726562, 0.29053688049316406, 0.9674148559570312, 0.007904052734375, 1.0177268981933594, -0.921630859375, -0.4544486999511719, -0.00827789306640625, 0.032825469970703125, 0.41602325439453125, 0.12654876708984375, 1.1423797607421875, 1.8759918212890625, -0.1390838623046875, -0.10292816162109375, -2.04827880859375, -0.7722625732421875, -0.1147003173828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000028.npy"}
|
||||
{"epoch": 0.041116005873715125, "step": 29, "batch_size": 64, "mean": 0.5947141647338867, "std": 0.5538198947906494, "min": -0.3796348571777344, "p10": -0.06402015686035153, "median": 0.5235109329223633, "p90": 1.4383010864257815, "max": 1.9280853271484375, "pos_frac": 0.84375, "sample": [1.1150321960449219, 0.0172576904296875, 0.4709930419921875, 0.5981292724609375, -0.0771331787109375, 0.6295299530029297, 0.186767578125, 1.1628799438476562, 0.5758399963378906, 0.224273681640625, 0.47406768798828125, 0.06081581115722656, 0.9008026123046875, 0.6577301025390625, -0.10687255859375, 1.10638427734375, 1.5722808837890625, -0.3796348571777344, -0.04295158386230469, 1.5940017700195312, 0.0515899658203125, 0.661224365234375, 0.4817771911621094, 0.5319347381591797, 1.1547088623046875, 0.7646636962890625, -0.03336334228515625, 0.2674827575683594, 1.211822509765625, 0.406768798828125, -0.0236663818359375, 1.1988296508789062, 1.9280853271484375, 0.01522064208984375, 0.7146148681640625, 0.14791488647460938, 0.56634521484375, -0.228668212890625, 1.3050537109375, 1.45489501953125, 0.769622802734375, 1.3995819091796875, 0.5002593994140625, 0.6626663208007812, 0.40545082092285156, -0.07304954528808594, 1.1971588134765625, -0.085845947265625, 0.6910057067871094, 0.016588211059570312, 0.3462028503417969, 0.23114013671875, 1.0866622924804688, 0.2610931396484375, 0.7463111877441406, 1.669281005859375, -0.1955718994140625, 0.43012237548828125, 0.4223365783691406, 0.67315673828125, 1.58050537109375, 1.493743896484375, 0.00077056884765625, 0.5150871276855469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000029.npy"}
|
||||
{"epoch": 0.042584434654919234, "step": 30, "batch_size": 64, "mean": 0.6149011850357056, "std": 0.5518370866775513, "min": -0.50909423828125, "p10": -0.017597007751464833, "median": 0.6128025054931641, "p90": 1.268630981445313, "max": 2.1337890625, "pos_frac": 0.875, "sample": [0.7926712036132812, 0.5622406005859375, -0.13998031616210938, 1.1211929321289062, 1.038787841796875, -0.47267913818359375, 0.08787155151367188, 0.568572998046875, -0.0049571990966796875, 0.690765380859375, 0.589508056640625, 0.8612594604492188, 0.3692169189453125, 1.0328636169433594, 0.084075927734375, 1.5294189453125, 0.5402374267578125, 0.56732177734375, -0.023014068603515625, 1.619781494140625, 0.814056396484375, 0.9200286865234375, 2.1337890625, 1.0419464111328125, 1.0943832397460938, 0.3710765838623047, 0.2862548828125, 0.3070220947265625, 0.05962562561035156, 0.10809326171875, 0.53076171875, -0.0350494384765625, 0.7018814086914062, -0.1667957305908203, 0.07328987121582031, 1.0692214965820312, 1.158935546875, 0.8731689453125, 1.4751739501953125, 0.7425613403320312, 0.08707237243652344, 0.9431610107421875, 0.6669960021972656, 0.5367813110351562, -0.50909423828125, 1.315643310546875, 0.9628753662109375, 0.13909149169921875, 1.1071281433105469, 0.7194671630859375, 1.3461685180664062, 0.9292831420898438, 0.45667457580566406, -0.4483623504638672, 0.25360107421875, 0.8258514404296875, 0.6975860595703125, 0.13879776000976562, 0.7302932739257812, 0.1351490020751953, 0.10467529296875, 2.0146484375, 0.6161270141601562, 0.6094779968261719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000030.npy"}
|
||||
{"epoch": 0.04405286343612335, "step": 31, "batch_size": 64, "mean": 0.4925113320350647, "std": 0.7040557861328125, "min": -1.121368408203125, "p10": -0.24380531311035153, "median": 0.352630615234375, "p90": 1.419279479980469, "max": 3.0770263671875, "pos_frac": 0.78125, "sample": [-0.08755683898925781, 1.6299400329589844, 0.14349365234375, 0.19375991821289062, 1.70068359375, 0.38356781005859375, -0.3941383361816406, -0.206146240234375, 0.19841766357421875, -0.1540985107421875, 3.0770263671875, 0.1778564453125, 0.259521484375, 0.2762584686279297, 0.024951934814453125, 0.12266349792480469, 0.8970947265625, 0.3632831573486328, 1.5557098388671875, 1.00860595703125, 0.09022140502929688, -0.2599449157714844, 0.22733306884765625, -0.5262832641601562, 0.35422515869140625, 0.3695793151855469, 1.1988258361816406, 0.3629798889160156, 0.5652198791503906, 0.3303031921386719, 1.06414794921875, 0.6981964111328125, 1.455841064453125, -0.325408935546875, -0.017589569091796875, 0.2162628173828125, 0.17481231689453125, 0.8938064575195312, -0.0204010009765625, 1.06695556640625, 1.0282135009765625, 1.2874298095703125, -0.3101348876953125, 0.414825439453125, 0.461395263671875, -1.121368408203125, -0.2919464111328125, 0.7305221557617188, 0.015995025634765625, -0.12321090698242188, 0.35103607177734375, 1.2177581787109375, 1.5823211669921875, -0.115692138671875, 0.0234222412109375, 0.32241058349609375, 0.5273513793945312, 0.202972412109375, 1.2421302795410156, 0.4684181213378906, 1.3339691162109375, 2.2417526245117188, 0.578094482421875, 0.3630828857421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000031.npy"}
|
||||
{"epoch": 0.04552129221732746, "step": 32, "batch_size": 64, "mean": 0.6792212724685669, "std": 0.877557635307312, "min": -1.164154052734375, "p10": -0.17584266662597653, "median": 0.45807743072509766, "p90": 1.9187961578369146, "max": 3.9172821044921875, "pos_frac": 0.84375, "sample": [0.2296905517578125, 0.307342529296875, 0.47939300537109375, 0.9735488891601562, 0.5958671569824219, 0.2294464111328125, 0.108978271484375, 0.8360137939453125, 0.5835247039794922, 1.2364273071289062, 1.1043701171875, 0.3771858215332031, -0.26372528076171875, 0.4613361358642578, -1.164154052734375, 0.7550544738769531, -0.8285484313964844, 0.3837318420410156, -0.25974273681640625, 2.076171875, 0.6710433959960938, 1.8055343627929688, 1.966766357421875, 0.3139495849609375, 1.114654541015625, 0.4114227294921875, 0.085662841796875, 0.03652191162109375, -0.122802734375, 1.152008056640625, 0.3178558349609375, 0.2995624542236328, 0.1996631622314453, 2.75750732421875, 1.8068656921386719, 0.17986679077148438, 0.4132537841796875, 0.32537841796875, -0.19601058959960938, 0.5958023071289062, 1.76318359375, 1.0230998992919922, -0.1287841796875, 0.4548187255859375, 3.9172821044921875, 0.8180770874023438, 0.48433685302734375, 2.4698867797851562, 2.1191864013671875, -0.40593719482421875, -0.015886306762695312, 0.116363525390625, 2.43682861328125, 0.8096847534179688, -0.3775634765625, 0.2006664276123047, 1.109039306640625, 1.446502685546875, 0.24913787841796875, 0.749908447265625, 0.25478363037109375, 0.4938201904296875, 1.017913818359375, 0.10739517211914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000032.npy"}
|
||||
{"epoch": 0.04698972099853157, "step": 33, "batch_size": 64, "mean": 0.6040312051773071, "std": 0.631873607635498, "min": -0.6550674438476562, "p10": -0.18494796752929685, "median": 0.5633697509765625, "p90": 1.482192611694336, "max": 2.240386962890625, "pos_frac": 0.84375, "sample": [-0.16852569580078125, 0.3718223571777344, 0.5716533660888672, 0.5697784423828125, 2.240386962890625, 0.0491180419921875, -0.191986083984375, 0.72576904296875, 0.3926048278808594, 0.4575824737548828, 0.9939193725585938, 0.9843215942382812, 1.5687255859375, 1.1679534912109375, 0.58148193359375, 0.39234161376953125, 0.6475677490234375, -0.3611602783203125, 0.7010040283203125, -0.353546142578125, -0.12920379638671875, 1.479583740234375, 1.4833106994628906, 1.7376022338867188, 0.3271217346191406, 0.21031951904296875, 1.4783706665039062, 0.2850189208984375, 0.0069122314453125, 2.0924072265625, 0.0276031494140625, 0.27457427978515625, 1.7858161926269531, -0.1527862548828125, 0.01816558837890625, 0.7325439453125, 0.5294342041015625, 0.3733711242675781, 0.7808761596679688, 0.4319267272949219, -0.4797172546386719, 1.5446701049804688, 0.10248565673828125, 0.4569664001464844, 0.5369110107421875, 1.0376129150390625, 0.1985015869140625, 1.423095703125, 0.8095035552978516, -0.6550674438476562, 0.5061492919921875, -0.21682167053222656, 1.3922309875488281, 0.6453018188476562, 0.67071533203125, 0.7316551208496094, 1.31219482421875, -0.32996368408203125, 0.6071624755859375, 0.5569610595703125, 0.5889625549316406, 0.4457550048828125, 0.8989524841308594, 0.7599983215332031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000033.npy"}
|
||||
{"epoch": 0.048458149779735685, "step": 34, "batch_size": 64, "mean": 0.8045632839202881, "std": 0.9114744663238525, "min": -0.44609832763671875, "p10": -0.11390285491943357, "median": 0.7149772644042969, "p90": 2.0186904907226566, "max": 3.5767822265625, "pos_frac": 0.828125, "sample": [2.0665130615234375, -0.4026908874511719, 1.166748046875, -0.19202804565429688, 2.61199951171875, 0.5196056365966797, 1.112274169921875, 1.2245941162109375, 0.094573974609375, 0.162017822265625, 0.02552032470703125, -0.07274436950683594, 0.841094970703125, 0.5996856689453125, 1.251129150390625, 3.1798553466796875, 0.16124916076660156, -0.1221466064453125, 1.8323822021484375, 0.0601806640625, 1.1736984252929688, -0.046726226806640625, 1.0916728973388672, 1.8738822937011719, 1.26092529296875, 1.4129257202148438, 1.0841598510742188, -0.44609832763671875, -0.04595947265625, 1.0825462341308594, 0.00736236572265625, 0.942169189453125, 1.460601806640625, 0.7122039794921875, 2.31024169921875, 0.7559242248535156, 0.7177505493164062, 2.898590087890625, 0.1437225341796875, -0.4295234680175781, 0.12296295166015625, 0.3782539367675781, 1.1457996368408203, 1.4564666748046875, 0.6991424560546875, 1.2125816345214844, 0.2834300994873047, -0.19642257690429688, 1.2548370361328125, 0.0059967041015625, 0.03930091857910156, -0.441070556640625, 0.8147506713867188, 0.34149169921875, 0.88934326171875, -0.09466743469238281, 1.9071044921875, 0.16660690307617188, 0.0777435302734375, 0.809967041015625, 2.3755340576171875, 3.5767822265625, 0.3882312774658203, 0.19800186157226562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000034.npy"}
|
||||
{"epoch": 0.049926578560939794, "step": 35, "batch_size": 64, "mean": 1.2318875789642334, "std": 1.166860580444336, "min": -0.40288543701171875, "p10": 0.17706909179687502, "median": 0.8730354309082031, "p90": 2.526919555664063, "max": 6.984893798828125, "pos_frac": 0.9375, "sample": [1.393341064453125, 0.8610191345214844, 3.527435302734375, 1.8893394470214844, 0.3377685546875, 0.8242721557617188, 1.325469970703125, -0.40288543701171875, 0.0856170654296875, 1.05963134765625, 1.8327484130859375, 0.16969680786132812, 0.9691238403320312, 0.7475166320800781, 0.44173431396484375, 0.766082763671875, 2.1292572021484375, 3.7020263671875, 0.45067596435546875, 0.24059104919433594, 0.6979618072509766, 1.8591995239257812, 2.7803497314453125, 0.23663330078125, 0.28598785400390625, 0.8842697143554688, 0.2741661071777344, 1.0394821166992188, 0.39359283447265625, 0.8618011474609375, 6.984893798828125, 1.5983200073242188, 0.4998779296875, 1.49798583984375, 0.6525592803955078, 3.3240966796875, 0.19427108764648438, 1.53900146484375, 2.2891693115234375, 2.370697021484375, 0.753204345703125, 2.5938720703125, 1.39678955078125, 1.4980545043945312, 0.8133659362792969, -0.024892807006835938, 1.6983184814453125, 2.2559967041015625, 1.29437255859375, 0.5192680358886719, 0.7052383422851562, 0.10711669921875, 0.7245655059814453, 1.83209228515625, 1.1169509887695312, 1.1543865203857422, 0.7160491943359375, -0.23319625854492188, 1.8105545043945312, -0.13494110107421875, 0.7295303344726562, 2.6264190673828125, 1.9766845703125, 0.29622650146484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000035.npy"}
|
||||
{"epoch": 0.0513950073421439, "step": 36, "batch_size": 64, "mean": 1.1320552825927734, "std": 1.2440820932388306, "min": -2.3649826049804688, "p10": -0.1616352081298828, "median": 0.9098854064941406, "p90": 2.8355712890625004, "max": 4.244781494140625, "pos_frac": 0.84375, "sample": [1.012542724609375, -0.10187339782714844, 1.1936893463134766, 1.129425048828125, 1.2757949829101562, 0.5376453399658203, 0.9230422973632812, -0.7307891845703125, 0.05372047424316406, 3.77166748046875, 2.0768508911132812, 0.2398395538330078, -0.1875762939453125, 0.41735076904296875, 0.29390525817871094, 1.3709945678710938, -0.14603805541992188, 1.72283935546875, 1.380523681640625, 0.610076904296875, -0.0508575439453125, -0.38620758056640625, -0.1683197021484375, 0.2493610382080078, 2.86138916015625, 3.863170623779297, 1.15277099609375, -0.217559814453125, 3.5848388671875, 0.7128524780273438, 0.7873477935791016, 1.3217544555664062, 0.2239990234375, 2.0942611694335938, 0.010753631591796875, 0.07952499389648438, 3.194366455078125, 0.7137603759765625, 2.143096923828125, 0.41086578369140625, 2.456268310546875, 1.1379241943359375, 2.5555267333984375, 0.8164710998535156, 2.77532958984375, 1.3341064453125, 0.021820068359375, 4.244781494140625, 1.0206966400146484, -2.3649826049804688, 1.4710044860839844, 0.7163314819335938, 2.7306671142578125, 0.6087150573730469, 2.5670852661132812, 1.2133255004882812, -0.2903900146484375, 0.5756072998046875, 2.8803558349609375, 0.896728515625, 1.6515960693359375, 2.694366455078125, 0.7706222534179688, 0.5427818298339844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000036.npy"}
|
||||
{"epoch": 0.05286343612334802, "step": 37, "batch_size": 64, "mean": 0.9501620531082153, "std": 1.3475593328475952, "min": -2.3604736328125, "p10": -0.38649749755859375, "median": 0.5825166702270508, "p90": 2.631443023681641, "max": 6.00604248046875, "pos_frac": 0.75, "sample": [1.7803421020507812, 0.1666126251220703, 1.2348556518554688, 1.3459854125976562, 0.06350326538085938, -0.24201202392578125, -0.47327423095703125, 0.39640045166015625, 1.459747314453125, 0.27457237243652344, 0.571563720703125, 3.428863525390625, 0.5081672668457031, -0.721710205078125, -0.39306640625, 0.31517791748046875, 0.7195644378662109, 0.10567665100097656, 1.9476757049560547, 6.00604248046875, 0.9227371215820312, 4.06781005859375, 1.177978515625, 1.0313911437988281, 0.9603652954101562, 1.3326416015625, 0.5750045776367188, 0.3632850646972656, 1.3086090087890625, 1.494873046875, 0.5900287628173828, -0.3711700439453125, 0.4417762756347656, -1.0695571899414062, 0.75286865234375, -0.08498764038085938, 0.377593994140625, 2.052978515625, 1.5491256713867188, 3.0401611328125, -0.31938743591308594, 2.53875732421875, 0.2938385009765625, 1.76629638671875, -0.0414276123046875, 0.5025253295898438, -0.10210037231445312, -0.5984039306640625, 0.63824462890625, 1.8736457824707031, 2.4405288696289062, -0.020355224609375, 2.6711654663085938, 2.3302993774414062, 0.28510284423828125, 2.7378005981445312, 1.8312873840332031, -2.3604736328125, -0.036792755126953125, -0.5692138671875, 2.3036766052246094, 3.2936553955078125, -0.0384063720703125, 0.38190460205078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000037.npy"}
|
||||
{"epoch": 0.05433186490455213, "step": 38, "batch_size": 64, "mean": 1.3563368320465088, "std": 1.7058887481689453, "min": -1.2567977905273438, "p10": -0.20231876373291013, "median": 0.9430198669433594, "p90": 3.180750274658205, "max": 7.969146728515625, "pos_frac": 0.796875, "sample": [0.35718536376953125, 7.969146728515625, 4.14862060546875, 1.8074226379394531, -0.16968917846679688, -0.004253387451171875, 0.9245529174804688, -0.4014739990234375, 1.4210128784179688, 2.30621337890625, 1.5365982055664062, 0.96148681640625, 1.0836944580078125, 2.6604843139648438, 1.4103622436523438, -0.21630287170410156, -0.0821990966796875, 0.0489959716796875, -0.11724853515625, 2.4453201293945312, 0.6276130676269531, 1.6512908935546875, 2.7146759033203125, 0.24423789978027344, 2.715057373046875, 0.26593589782714844, 2.42608642578125, 2.6100616455078125, 1.6014060974121094, -0.2786216735839844, 0.5016136169433594, 3.3803329467773438, -1.2567977905273438, 1.292327880859375, 0.6267185211181641, 0.5487060546875, 0.8697509765625, 6.1746826171875, 0.7405548095703125, -0.03708648681640625, 3.8610153198242188, 5.322479248046875, 1.6212081909179688, 0.2769317626953125, 0.3282585144042969, -0.6648712158203125, 0.39415740966796875, 0.2005462646484375, 0.8535919189453125, 1.0558929443359375, -0.8385772705078125, 0.2808685302734375, -0.08730316162109375, 1.6823310852050781, 1.0840473175048828, 1.90216064453125, 1.790740966796875, 5.464012145996094, 1.943603515625, 0.38134002685546875, 2.223339080810547, -0.363433837890625, 0.525054931640625, 2.059680938720703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000038.npy"}
|
||||
{"epoch": 0.055800293685756244, "step": 39, "batch_size": 64, "mean": 1.661270260810852, "std": 1.4308812618255615, "min": -2.043365478515625, "p10": 0.2959400177001954, "median": 1.3179140090942383, "p90": 3.6941799163818367, "max": 5.5604400634765625, "pos_frac": 0.921875, "sample": [2.28466796875, 2.1719207763671875, 5.153289794921875, 2.0862045288085938, 1.5940933227539062, -0.582794189453125, 3.2378692626953125, 2.4223594665527344, 1.2639408111572266, 2.6162776947021484, 0.62249755859375, 3.3892135620117188, 1.3951873779296875, 1.033651351928711, 4.059379577636719, 1.0279464721679688, 0.3799285888671875, 0.812103271484375, 0.262908935546875, 1.258453369140625, -0.1173248291015625, 1.6656417846679688, 2.4299545288085938, 0.8299026489257812, 0.8090534210205078, 2.1975021362304688, -0.1520843505859375, 1.6841983795166016, 4.789306640625, 0.3730125427246094, 0.738311767578125, 0.5928821563720703, 2.9193954467773438, 2.1005096435546875, -2.043365478515625, 2.1294898986816406, 0.5549774169921875, 1.37188720703125, 3.7797164916992188, 5.5604400634765625, 1.1661853790283203, 1.8416290283203125, 4.111000061035156, 0.9280319213867188, 1.2473030090332031, 0.7549896240234375, 0.0735321044921875, 0.8406753540039062, 1.7746658325195312, 0.5743770599365234, 0.8733367919921875, 3.4945945739746094, 2.5603866577148438, 2.5959320068359375, 0.6188449859619141, 4.272987365722656, 3.0732269287109375, 0.9506931304931641, 1.7112789154052734, 0.5737876892089844, 0.7633323669433594, -0.012180328369140625, 0.5238437652587891, 2.306337356567383], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000039.npy"}
|
||||
{"epoch": 0.05726872246696035, "step": 40, "batch_size": 64, "mean": 1.5557358264923096, "std": 1.7941007614135742, "min": -0.9403076171875, "p10": -0.16397781372070305, "median": 1.1564788818359375, "p90": 3.891345596313477, "max": 8.24774169921875, "pos_frac": 0.859375, "sample": [-0.601104736328125, 0.15721893310546875, 0.6253204345703125, 1.4404792785644531, 1.1649284362792969, 0.8715133666992188, 0.6986541748046875, 1.0828704833984375, 0.019121170043945312, 2.002256393432617, 7.3303985595703125, 3.8150634765625, 0.8016777038574219, 3.9561424255371094, 1.2317047119140625, -0.4428443908691406, 2.0833892822265625, 1.565582275390625, 0.09300041198730469, -0.9403076171875, 3.0159149169921875, -0.19671249389648438, 0.646942138671875, 0.1615447998046875, 2.3714752197265625, 1.551605224609375, 0.14413070678710938, 0.9357833862304688, 0.4636192321777344, -0.3660888671875, 3.7685317993164062, -0.07369613647460938, 3.367645263671875, -0.44769287109375, 0.6054286956787109, 4.268035888671875, 0.06793594360351562, 3.9240379333496094, 0.504547119140625, 1.0905494689941406, 4.752647399902344, 0.9206924438476562, 1.3084259033203125, 8.24774169921875, -0.4919548034667969, 1.5835800170898438, 0.4697418212890625, 2.0676116943359375, 2.6245155334472656, 2.2839813232421875, 1.148712158203125, 2.1335983276367188, 3.3432998657226562, 1.7840957641601562, 1.16424560546875, 1.4571456909179688, 5.0824737548828125, 0.01499176025390625, 1.4302597045898438, -0.08759689331054688, 0.03656768798828125, 2.2022171020507812, 2.5958938598632812, 0.7355995178222656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000040.npy"}
|
||||
{"epoch": 0.05873715124816446, "step": 41, "batch_size": 64, "mean": 1.8329112529754639, "std": 2.006847620010376, "min": -2.9318389892578125, "p10": -0.09852333068847643, "median": 1.4682912826538086, "p90": 4.234089660644532, "max": 8.529579162597656, "pos_frac": 0.890625, "sample": [0.9262008666992188, 0.8810882568359375, 4.0255889892578125, 1.15167236328125, 1.1148300170898438, 2.2075424194335938, 3.7003173828125, 2.1318588256835938, -0.650909423828125, 3.5461807250976562, 5.9547119140625, 0.6876068115234375, 1.9691162109375, 8.529579162597656, 0.25676918029785156, 0.3228302001953125, 0.9218368530273438, 1.145111083984375, -0.150604248046875, -2.09442138671875, 1.600067138671875, 3.595561981201172, 4.460334777832031, 0.8341941833496094, 7.0517578125, 1.7307510375976562, 4.0932159423828125, 4.294464111328125, 0.8145027160644531, 0.4136314392089844, 2.2312164306640625, 0.545318603515625, 6.084808349609375, 0.7645359039306641, 1.3651256561279297, 3.22955322265625, 1.9459609985351562, 2.24432373046875, -0.27001953125, 2.3298797607421875, 3.7579498291015625, 1.8153133392333984, -2.9318389892578125, 3.2721405029296875, 0.340606689453125, 0.72271728515625, 2.3917160034179688, 2.0280723571777344, 2.170642852783203, 0.19733810424804688, 1.259246826171875, 0.8467788696289062, 1.6796112060546875, 1.5714569091796875, 0.6752872467041016, 1.0916194915771484, -0.20726776123046875, 3.707866668701172, 0.48320770263671875, 0.14780235290527344, 0.022998809814453125, -0.33703041076660156, 1.7411270141601562, 4.9228668212890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000041.npy"}
|
||||
{"epoch": 0.06020558002936858, "step": 42, "batch_size": 64, "mean": 2.4430971145629883, "std": 2.269852638244629, "min": -1.5936965942382812, "p10": 0.18851280212402352, "median": 2.023622512817383, "p90": 5.565797424316407, "max": 10.49078369140625, "pos_frac": 0.921875, "sample": [1.6454544067382812, 0.28168487548828125, 2.2425384521484375, 9.124908447265625, 10.49078369140625, 2.329345703125, 0.15639495849609375, 5.585540771484375, 2.2704544067382812, 2.8065414428710938, 1.9865951538085938, 6.225669860839844, 2.7176151275634766, 6.267242431640625, 0.6770420074462891, 2.0773239135742188, 2.9420013427734375, -0.04108428955078125, 2.68206787109375, 2.9412384033203125, 1.8058700561523438, 1.4852008819580078, 3.4903182983398438, 0.42372894287109375, 1.2425079345703125, 2.6503143310546875, 0.0562896728515625, 2.232147216796875, 1.8798637390136719, -0.42188262939453125, 2.060649871826172, 5.3470611572265625, 0.4117889404296875, 6.060089111328125, 2.2157058715820312, 1.269317626953125, 0.6838493347167969, 1.3069534301757812, 1.8498687744140625, -0.62652587890625, 3.9682769775390625, -0.453521728515625, 1.4656982421875, 5.5197296142578125, 1.6385841369628906, 4.257347106933594, 0.7619361877441406, 0.2634544372558594, 1.2707023620605469, 1.5637741088867188, 0.6918487548828125, 5.96429443359375, -1.5936965942382812, 0.7961406707763672, 2.6426773071289062, 3.5308303833007812, 0.2882728576660156, 1.6505279541015625, 4.829032897949219, 2.5759124755859375, 3.854400634765625, 5.009391784667969, 0.6690692901611328, 4.361053466796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000042.npy"}
|
||||
{"epoch": 0.06167400881057269, "step": 43, "batch_size": 64, "mean": 2.442809581756592, "std": 1.9496551752090454, "min": -1.4351043701171875, "p10": 0.26261138916015625, "median": 2.2545957565307617, "p90": 5.022203826904297, "max": 10.052215576171875, "pos_frac": 0.953125, "sample": [0.2620964050292969, 0.5307502746582031, -0.07110214233398438, 3.4310989379882812, 3.6869430541992188, 0.99639892578125, 3.8132858276367188, 2.6451568603515625, 2.6368579864501953, 6.619773864746094, 0.17012786865234375, 2.149648666381836, 0.9883899688720703, 1.6595954895019531, 3.3522415161132812, 1.1884346008300781, 3.429168701171875, 5.036918640136719, 1.852447509765625, 0.38503074645996094, 5.931884765625, 2.4200439453125, 5.0406036376953125, 1.2578964233398438, 2.9281234741210938, 2.493377685546875, 0.2638130187988281, 2.2650928497314453, 0.10572433471679688, 4.9878692626953125, 1.5696563720703125, 2.5766754150390625, 3.9751815795898438, 2.60211181640625, 2.674896240234375, 3.495441436767578, 1.069915771484375, 2.244098663330078, 2.0417003631591797, 0.6188583374023438, 1.5271682739257812, 2.1547470092773438, 6.7868804931640625, 3.7316131591796875, 0.8685302734375, -1.4351043701171875, 1.224996566772461, 2.5656890869140625, 1.8525543212890625, 3.843902587890625, 3.073314666748047, 3.5510787963867188, 2.1605186462402344, 0.0019321441650390625, 2.0753021240234375, 10.052215576171875, 3.286956787109375, 1.8565521240234375, -0.3141822814941406, 0.372528076171875, 3.0262603759765625, 5.210735321044922, 3.271820068359375, 0.267578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000043.npy"}
|
||||
{"epoch": 0.0631424375917768, "step": 44, "batch_size": 64, "mean": 2.79276704788208, "std": 2.1667473316192627, "min": -0.8519744873046875, "p10": 0.5859107971191406, "median": 2.3977203369140625, "p90": 5.579143524169924, "max": 10.09588623046875, "pos_frac": 0.953125, "sample": [3.9048614501953125, 2.5586929321289062, 1.5786056518554688, 0.6625423431396484, 6.211345672607422, 2.419677734375, 3.01202392578125, 1.5550765991210938, 2.3518600463867188, 5.229576110839844, 6.401313781738281, 0.4815101623535156, 4.779060363769531, 4.9478302001953125, 4.939849853515625, 3.2212066650390625, 0.9130439758300781, 2.375762939453125, 2.053281784057617, 5.7289581298828125, 0.5825176239013672, 2.8672637939453125, 2.6705875396728516, 7.9504547119140625, 2.486175537109375, 3.1745986938476562, 0.7161483764648438, 1.330780029296875, 5.001617431640625, 1.7666893005371094, 2.936065673828125, -0.028429031372070312, 1.7243595123291016, 1.3251304626464844, 1.974740982055664, 3.1199569702148438, 0.7998123168945312, 1.9022369384765625, 4.379852294921875, 2.341766357421875, 1.3996124267578125, 1.860076904296875, 3.902130126953125, 3.7816848754882812, 2.0951385498046875, 2.7619705200195312, 2.647491455078125, 3.02325439453125, 0.5938282012939453, 0.4058799743652344, 6.7511138916015625, 4.43280029296875, 1.67724609375, 3.1543731689453125, 1.011739730834961, 10.09588623046875, 0.7560081481933594, -0.8519744873046875, 3.7899017333984375, 1.69183349609375, -0.7720203399658203, 0.20571517944335938, 1.501251220703125, 8.473731994628906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000044.npy"}
|
||||
{"epoch": 0.06461086637298091, "step": 45, "batch_size": 64, "mean": 2.1753129959106445, "std": 2.469676971435547, "min": -1.6701431274414062, "p10": -0.21452560424804684, "median": 1.4464025497436523, "p90": 5.598724365234376, "max": 11.6182861328125, "pos_frac": 0.84375, "sample": [3.3045120239257812, 0.9381866455078125, 0.34355926513671875, 3.1077613830566406, 0.2890281677246094, 1.4173564910888672, 3.2668495178222656, 6.3368682861328125, 5.444915771484375, 2.4598388671875, 0.9235763549804688, 7.416648864746094, 3.5307750701904297, 0.3104286193847656, 0.007289886474609375, 1.4754486083984375, -0.24982452392578125, 1.8218498229980469, 0.042755126953125, 4.9788970947265625, -0.19359588623046875, 0.3784523010253906, 2.5172348022460938, 0.7924461364746094, 0.7106494903564453, 5.121337890625, 5.664642333984375, 4.1016998291015625, 0.09058952331542969, 0.6059055328369141, 5.077239990234375, 2.67333984375, 5.812164306640625, -0.14371109008789062, -0.5189971923828125, 1.368377685546875, 5.7960052490234375, 3.6108551025390625, -0.2234954833984375, 1.3991851806640625, 1.6059932708740234, 0.30661773681640625, -1.6701431274414062, -1.0106277465820312, 1.379302978515625, 11.6182861328125, 1.2691802978515625, 7.169853210449219, 3.1139602661132812, -0.7489700317382812, 2.6755619049072266, 1.502960205078125, 2.0133628845214844, 0.24318885803222656, 1.3504505157470703, 5.2136993408203125, 0.8504638671875, 2.5717010498046875, 2.083181381225586, 1.5474014282226562, -0.10901069641113281, 4.389549255371094, 0.7398681640625, -0.6928482055664062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000045.npy"}
|
||||
{"epoch": 0.06607929515418502, "step": 46, "batch_size": 64, "mean": 2.8499715328216553, "std": 3.068298816680908, "min": -2.4073867797851562, "p10": -0.0946937561035156, "median": 2.2493247985839844, "p90": 6.662253189086915, "max": 14.347259521484375, "pos_frac": 0.859375, "sample": [0.46659088134765625, -0.079254150390625, 5.189666748046875, 7.146759033203125, 3.92388916015625, -2.4073867797851562, 2.2213363647460938, 1.60968017578125, 6.70745849609375, 3.5691967010498047, 6.556774139404297, 1.7341346740722656, 0.24813079833984375, 2.3890380859375, 4.103515625, 0.16102218627929688, -0.3400688171386719, 0.6284027099609375, 4.5629730224609375, 10.94451904296875, -0.43232154846191406, 0.0337982177734375, -0.20252227783203125, 3.4799652099609375, 0.11851882934570312, 4.3006134033203125, 3.1732635498046875, 1.2138137817382812, 3.337963104248047, 2.63226318359375, 4.112579345703125, 0.6205615997314453, -2.2833251953125, 0.7545547485351562, 3.4360504150390625, 5.366893768310547, 5.497707366943359, -0.10131072998046875, 8.180580139160156, -0.0145111083984375, 0.21286964416503906, 4.0003509521484375, 5.449974060058594, 0.47910308837890625, 14.347259521484375, 0.3905792236328125, 6.1465301513671875, 5.931434631347656, 0.7168655395507812, 2.48065185546875, 1.5321197509765625, 8.055633544921875, 3.038541793823242, 0.6772499084472656, 1.8967475891113281, 7.608253479003906, 1.5148239135742188, 2.277313232421875, 3.6109466552734375, 5.390716552734375, 2.0089263916015625, 1.0011310577392578, 1.3736438751220703, -0.30500030517578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000046.npy"}
|
||||
{"epoch": 0.06754772393538913, "step": 47, "batch_size": 64, "mean": 2.840768337249756, "std": 2.613548994064331, "min": -2.39349365234375, "p10": -0.11950988769531223, "median": 2.103339195251465, "p90": 6.352000427246094, "max": 9.89508056640625, "pos_frac": 0.890625, "sample": [1.8192901611328125, 3.2873382568359375, 0.67010498046875, 1.2982654571533203, 5.090778350830078, 1.158670425415039, 0.5711212158203125, 1.706207275390625, 6.72772216796875, 2.8251590728759766, 0.21440887451171875, 5.719280242919922, 6.2896270751953125, 1.2484207153320312, 3.6047286987304688, 2.1323318481445312, -0.23333740234375, 1.9200820922851562, 1.9044189453125, 5.512481689453125, 6.146308898925781, 1.5094375610351562, 0.7093124389648438, 3.282459259033203, 3.38958740234375, 1.360586166381836, -0.7545928955078125, 6.3598785400390625, 7.338287353515625, 2.665416717529297, -0.4126300811767578, 3.327117919921875, 6.499042510986328, 3.561431884765625, -2.39349365234375, 0.146087646484375, 0.33386993408203125, 1.1248512268066406, 1.4536666870117188, 9.89508056640625, 6.089508056640625, 2.313413619995117, 1.2343769073486328, 2.91131591796875, 8.7781982421875, 6.3336181640625, -0.2670440673828125, 3.2668724060058594, 9.780853271484375, -0.5697250366210938, 1.5237655639648438, 1.790771484375, -1.0720672607421875, 1.4345283508300781, 2.7680816650390625, 1.3749351501464844, 5.156166076660156, 1.9610824584960938, 3.840911865234375, 2.018054962158203, 4.465179443359375, 3.409271240234375, 2.183961868286133, 2.0743465423583984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000047.npy"}
|
||||
{"epoch": 0.06901615271659324, "step": 48, "batch_size": 64, "mean": 2.877974510192871, "std": 2.9358506202697754, "min": -6.9232177734375, "p10": -0.038709449768066354, "median": 2.2776565551757812, "p90": 6.89225311279297, "max": 11.475784301757812, "pos_frac": 0.890625, "sample": [3.65875244140625, 0.9060592651367188, 7.0247344970703125, 8.037147521972656, 6.2869110107421875, 3.1755447387695312, -1.073486328125, 2.2208404541015625, 4.703033447265625, 3.7669219970703125, 1.819549560546875, 6.5831298828125, 1.83135986328125, -0.386871337890625, 0.00933837890625, 1.8355941772460938, 5.870330810546875, -0.05930137634277344, 1.221588134765625, -0.3409309387207031, 1.0049705505371094, 8.375740051269531, -0.44937896728515625, 1.9721603393554688, 5.637306213378906, 1.5529556274414062, 3.5509185791015625, 3.3101367950439453, 0.15979766845703125, 3.0058841705322266, 1.6121635437011719, 2.0368785858154297, 2.7106857299804688, 2.157258987426758, 8.436599731445312, 4.172441482543945, 4.9091796875, 9.086235046386719, 3.238372802734375, 1.0983657836914062, 1.3006172180175781, 1.2100391387939453, 1.6550064086914062, 2.7113494873046875, 1.1465606689453125, 3.699502944946289, 2.694284439086914, 1.841827392578125, -1.661376953125, 8.3824462890625, 2.7319164276123047, 2.930929183959961, 2.33447265625, 11.475784301757812, 3.9599456787109375, -6.9232177734375, 5.934528350830078, 2.034900665283203, 0.7655754089355469, 0.5584716796875, 4.787628173828125, 1.4807548522949219, 2.4851417541503906, 1.9843673706054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000048.npy"}
|
||||
{"epoch": 0.07048458149779736, "step": 49, "batch_size": 64, "mean": 3.5184361934661865, "std": 3.6862456798553467, "min": -3.5025405883789062, "p10": -0.08754997253417954, "median": 2.9876184463500977, "p90": 7.70450439453125, "max": 14.711380004882812, "pos_frac": 0.890625, "sample": [3.6302032470703125, 0.5436477661132812, 0.4235668182373047, 3.4044876098632812, 3.019479751586914, 5.706729888916016, 0.31093597412109375, 2.311725616455078, -1.00579833984375, 0.8381500244140625, 0.06939506530761719, 2.1940536499023438, 2.9557571411132812, 7.756080627441406, 6.64105224609375, 2.3030624389648438, 4.201168060302734, 1.807546615600586, 3.5951309204101562, 0.73046875, 1.7814922332763672, -3.148923873901367, 3.3990936279296875, -0.14714622497558594, 14.057464599609375, 10.052444458007812, 1.049041748046875, 5.904804229736328, 4.169921875, 0.6349048614501953, 7.584159851074219, 14.711380004882812, 4.6027679443359375, 0.6024169921875, -0.675567626953125, -3.5025405883789062, 3.385589599609375, 3.532041549682617, 4.468963623046875, 3.8557586669921875, 2.680356979370117, 0.05150794982910156, -0.57159423828125, 4.868694305419922, 5.211448669433594, 14.18927001953125, 2.365325927734375, 2.0281143188476562, 7.850090026855469, 5.980010986328125, 6.480079650878906, 2.897960662841797, 4.590782165527344, 3.97662353515625, 11.988616943359375, 0.8440532684326172, -0.1999664306640625, 2.7769012451171875, 1.7648506164550781, 4.6578826904296875, 5.1352081298828125, 1.7454872131347656, 4.939907073974609, 1.1733856201171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000049.npy"}
|
||||
{"epoch": 0.07195301027900147, "step": 50, "batch_size": 64, "mean": 3.8532233238220215, "std": 3.6484310626983643, "min": -2.952472686767578, "p10": -0.5097091674804687, "median": 3.1749134063720703, "p90": 8.713336181640626, "max": 14.080612182617188, "pos_frac": 0.859375, "sample": [6.015045166015625, -0.30696868896484375, -1.638397216796875, 9.750770568847656, 1.2790374755859375, 3.0621414184570312, 7.4101715087890625, 1.9560279846191406, 4.035160064697266, 1.7683038711547852, -0.4518280029296875, 6.5385894775390625, 3.361663818359375, 14.0477294921875, 0.698333740234375, 2.183074951171875, 6.8311004638671875, 3.020967483520508, 8.899032592773438, 3.878204345703125, 0.9607810974121094, 5.0369110107421875, 9.009696960449219, 2.3343048095703125, 2.131488800048828, 4.568454742431641, 0.2797355651855469, -0.6319313049316406, 3.2876853942871094, 8.280044555664062, 6.435859680175781, 5.352210998535156, 9.22445297241211, 14.080612182617188, -2.952472686767578, 0.8235569000244141, 1.538543701171875, 2.0575332641601562, 0.23209381103515625, 4.112571716308594, 2.556243896484375, 1.7233257293701172, 6.143455505371094, 4.458793640136719, 7.086414337158203, 6.585746765136719, -0.534515380859375, 1.9978141784667969, 5.6413116455078125, -0.6705551147460938, 6.3460235595703125, 2.6407318115234375, 4.405689239501953, 5.542961120605469, 2.5075759887695312, 5.5762786865234375, 6.916351318359375, 2.1731719970703125, 11.439788818359375, -0.7605361938476562, 6.6921844482421875, 1.6062545776367188, -2.0253143310546875, 0.0568084716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000050.npy"}
|
||||
{"epoch": 0.07342143906020558, "step": 51, "batch_size": 64, "mean": 3.7630162239074707, "std": 4.703429698944092, "min": -5.166229248046875, "p10": -0.41556549072265614, "median": 2.526630401611328, "p90": 9.440888977050783, "max": 18.767913818359375, "pos_frac": 0.828125, "sample": [9.215057373046875, 4.0721588134765625, 0.8317527770996094, -0.5561485290527344, 0.5831527709960938, 0.25457763671875, 8.388870239257812, -0.7456722259521484, 2.1972885131835938, -0.2749176025390625, 10.6676025390625, 7.185367584228516, 2.0888805389404297, 4.586181640625, 2.1671714782714844, 4.273223876953125, 3.6186141967773438, -0.9068984985351562, 9.537673950195312, 5.864185333251953, 4.116769790649414, -0.451202392578125, -0.03157615661621094, 3.8654251098632812, 1.6356964111328125, 7.602939605712891, 0.7411518096923828, 5.819000244140625, 0.9196319580078125, 4.769340515136719, 18.767913818359375, 3.1243667602539062, 0.5917701721191406, 5.9184722900390625, 3.0823631286621094, 14.094009399414062, 1.0983238220214844, 14.516128540039062, 5.601112365722656, 1.9652576446533203, 2.1726150512695312, 0.8728599548339844, 3.8297576904296875, 8.844024658203125, 1.3051948547363281, 0.11056900024414062, 5.011528015136719, -4.1429595947265625, -0.31855010986328125, 17.345687866210938, 12.862747192382812, 6.658042907714844, 7.170555114746094, -0.3324127197265625, 1.6259326934814453, 5.575187683105469, 0.07121658325195312, 0.26104736328125, 2.63189697265625, -5.166229248046875, 2.5373916625976562, -1.484771728515625, 2.515869140625, 0.08082199096679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000051.npy"}
|
||||
{"epoch": 0.07488986784140969, "step": 52, "batch_size": 64, "mean": 5.8035993576049805, "std": 4.671248912811279, "min": -2.263275146484375, "p10": 0.8415302276611332, "median": 4.752958297729492, "p90": 11.897981262207031, "max": 19.9918212890625, "pos_frac": 0.9375, "sample": [1.7556610107421875, 11.919052124023438, 4.215843200683594, 11.757377624511719, 7.0641937255859375, 9.878311157226562, 3.346660614013672, 6.8171844482421875, 9.1195068359375, 13.230865478515625, 0.0894927978515625, 12.631404876708984, 5.3062896728515625, 5.473968505859375, 8.705045700073242, 8.152509689331055, 3.1050186157226562, 4.842931747436523, 16.39698028564453, 9.47406005859375, 3.239412307739258, 1.2596893310546875, 11.714401245117188, 2.956930160522461, 0.05569267272949219, 7.7087249755859375, 2.5213394165039062, 2.273649215698242, 8.478780746459961, 19.9918212890625, 9.068328857421875, 1.8976116180419922, 9.565040588378906, 4.763103485107422, 3.97637939453125, 13.368209838867188, 3.863800048828125, -0.42706298828125, 4.333076477050781, -2.263275146484375, -2.2609786987304688, 2.5997238159179688, 6.184345245361328, 6.038612365722656, 1.4183616638183594, 9.258750915527344, 4.1589813232421875, 11.84881591796875, 7.000087738037109, 2.028097152709961, 0.6623191833496094, 3.2360687255859375, 3.1067771911621094, 4.7428131103515625, 3.7026901245117188, 9.698600769042969, 5.681186676025391, 1.3438873291015625, 16.558494567871094, -0.8646888732910156, 5.809478759765625, 3.7965736389160156, 2.5375919342041016, 1.5157890319824219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000052.npy"}
|
||||
{"epoch": 0.0763582966226138, "step": 53, "batch_size": 64, "mean": 5.858806610107422, "std": 6.35233736038208, "min": -3.1304855346679688, "p10": 0.4056007385253907, "median": 3.9252796173095703, "p90": 14.756396865844726, "max": 28.422332763671875, "pos_frac": 0.921875, "sample": [-1.8813934326171875, 3.6830215454101562, 3.3217315673828125, 3.115997314453125, 1.29541015625, 3.6913928985595703, 13.428291320800781, 0.250579833984375, 7.8429718017578125, -1.36865234375, 2.4702529907226562, 27.430328369140625, -2.705322265625, 1.0210342407226562, 6.494873046875, 7.216503143310547, 0.973663330078125, 10.53271484375, 2.6043167114257812, 11.796211242675781, 4.0410919189453125, 6.144317626953125, 11.2779541015625, 7.136871337890625, 14.645854949951172, 1.48614501953125, 4.985252380371094, 0.6903839111328125, 5.381538391113281, 1.622610092163086, 17.159576416015625, 8.510501861572266, 7.305377960205078, 8.176849365234375, 3.809467315673828, 0.4496498107910156, 0.3867225646972656, 4.203441619873047, 1.0984668731689453, 4.139215469360352, 3.066650390625, 7.326005935668945, 1.610891342163086, 13.750099182128906, -3.1304855346679688, 17.16851806640625, 18.06982421875, 3.4354934692382812, 5.1846923828125, 3.2023563385009766, 2.1983184814453125, 4.161773681640625, 2.6891326904296875, 1.8874664306640625, 3.1546096801757812, 4.336627960205078, 2.9751510620117188, 28.422332763671875, 2.5500526428222656, 14.80377197265625, 4.518444061279297, 4.751708984375, 15.416847229003906, -0.45187950134277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000053.npy"}
|
||||
{"epoch": 0.07782672540381791, "step": 54, "batch_size": 64, "mean": 5.140138626098633, "std": 4.829864501953125, "min": -0.9620780944824219, "p10": 0.32008800506591817, "median": 4.017575263977051, "p90": 11.596824645996096, "max": 21.24468994140625, "pos_frac": 0.9375, "sample": [7.621147155761719, 4.414459228515625, 8.652446746826172, 1.1361351013183594, 0.5490531921386719, 3.175018310546875, 0.9749488830566406, 2.4886722564697266, 5.302879333496094, 7.856266021728516, 1.013010025024414, 5.9959259033203125, 11.75848388671875, 2.078256607055664, 4.490947723388672, 10.457962036132812, -0.046230316162109375, 4.820980072021484, 5.111654281616211, 5.55145263671875, 0.014324188232421875, 1.3942184448242188, 16.44689178466797, 0.2421417236328125, 7.308082580566406, 0.5242767333984375, 4.648773193359375, 5.3415679931640625, 5.847175598144531, 5.922248840332031, 1.8288326263427734, -0.20121002197265625, 7.8827056884765625, -0.017307281494140625, 1.1291313171386719, 3.6520137786865234, 12.126335144042969, 3.0260772705078125, 6.513275146484375, 12.639846801757812, 1.7328681945800781, 2.323619842529297, 21.24468994140625, 10.881187438964844, -0.9620780944824219, 3.027841567993164, 0.09111595153808594, 13.53460693359375, 0.5019626617431641, 3.2708892822265625, 1.0141372680664062, 4.383136749267578, 10.6690673828125, 1.6959228515625, 2.13507080078125, 18.361114501953125, 1.3718624114990234, 4.727449417114258, 3.5568008422851562, 11.219619750976562, 2.4278430938720703, 10.282501220703125, 1.1940174102783203, 10.610748291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000054.npy"}
|
||||
{"epoch": 0.07929515418502203, "step": 55, "batch_size": 64, "mean": 6.065757751464844, "std": 6.9302568435668945, "min": -9.974044799804688, "p10": -1.4487709045410153, "median": 5.085441589355469, "p90": 15.090407562255862, "max": 26.80706787109375, "pos_frac": 0.8125, "sample": [8.024360656738281, 9.885421752929688, 5.123321533203125, -3.736480712890625, 23.239410400390625, -1.5897140502929688, 7.198402404785156, 4.482112884521484, 16.632537841796875, -0.8401870727539062, 6.244182586669922, 5.5690460205078125, 2.2930450439453125, 11.554946899414062, 18.629638671875, 9.412750244140625, 4.592010498046875, 8.66217041015625, 26.80706787109375, 3.6478271484375, 11.172866821289062, -1.58026123046875, 4.092460632324219, 8.440834045410156, 22.857940673828125, 0.35133934020996094, 15.361419677734375, 0.4069976806640625, 0.3116321563720703, 8.238872528076172, 4.938163757324219, -2.5279998779296875, 5.842643737792969, 0.0079345703125, 11.943038940429688, 4.3851165771484375, -1.6070690155029297, -0.12729644775390625, 0.09222221374511719, -5.101463317871094, 9.090404510498047, 3.1860218048095703, 1.4642410278320312, 14.458045959472656, 6.776679992675781, -1.1419601440429688, 0.8346118927001953, 10.406982421875, 5.0475616455078125, -9.974044799804688, 2.818695068359375, 14.439193725585938, 6.8582611083984375, 4.157207489013672, 7.096149444580078, 5.387956619262695, -0.8287944793701172, 8.062065124511719, 7.663368225097656, 12.98004150390625, 4.962226867675781, -0.20335769653320312, 4.5840606689453125, 16.751571655273438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000055.npy"}
|
||||
{"epoch": 0.08076358296622614, "step": 56, "batch_size": 64, "mean": 5.75810432434082, "std": 6.839889049530029, "min": -7.942081451416016, "p10": -1.4366912841796873, "median": 5.166349411010742, "p90": 14.554774475097663, "max": 25.543807983398438, "pos_frac": 0.8125, "sample": [-1.5157470703125, 6.733245849609375, -3.1208953857421875, 8.423038482666016, -0.8471221923828125, -7.942081451416016, 9.786949157714844, -1.1985912322998047, 3.358367919921875, -3.5463638305664062, 3.446014404296875, 1.8240852355957031, 5.302101135253906, 15.23748779296875, 2.243429183959961, -0.04460906982421875, -1.1580009460449219, 10.883071899414062, 9.561084747314453, 7.479316711425781, -2.1591339111328125, 10.281326293945312, 2.64300537109375, 21.681121826171875, 7.536834716796875, 15.727020263671875, 1.1421546936035156, 0.14380836486816406, 9.265785217285156, 2.7631778717041016, 7.484588623046875, 7.120475769042969, 8.745986938476562, 1.5236434936523438, 10.352890014648438, 12.10903549194336, 0.5202903747558594, 19.61529541015625, 7.659202575683594, 6.535789489746094, 5.656238555908203, 5.030597686767578, 3.0087203979492188, 20.209625244140625, 6.9645538330078125, 12.4315185546875, 6.061712265014648, 10.748062133789062, 0.30513763427734375, 25.543807983398438, 1.497171401977539, 0.0480499267578125, 4.1146240234375, 12.961776733398438, -7.021575927734375, 2.968679428100586, -1.252227783203125, 21.046241760253906, 4.931232452392578, -3.0307464599609375, 3.3074569702148438, 2.172943115234375, 5.918853759765625, 9.299118041992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000056.npy"}
|
||||
{"epoch": 0.08223201174743025, "step": 57, "batch_size": 64, "mean": 6.474340438842773, "std": 6.0126261711120605, "min": -9.234222412109375, "p10": 0.21091995239257827, "median": 6.256856918334961, "p90": 14.428468322753908, "max": 21.887466430664062, "pos_frac": 0.921875, "sample": [7.941349029541016, 3.626401901245117, 0.5705413818359375, -2.128997802734375, 10.112533569335938, -9.234222412109375, 11.219345092773438, 0.35858154296875, 13.146728515625, 6.241542816162109, 16.040176391601562, 3.717439651489258, 21.887466430664062, 3.57379150390625, 6.705718994140625, 6.541412353515625, -1.1481704711914062, 3.4126663208007812, 6.421882629394531, 20.492218017578125, 10.417755126953125, 16.257369995117188, 11.017974853515625, 4.017475128173828, 6.462642669677734, 1.2900161743164062, 14.570388793945312, 5.379384994506836, 4.221502304077148, 3.187164306640625, 1.4355850219726562, 4.02598762512207, 3.3044052124023438, 0.12552833557128906, 7.229583740234375, 6.7625732421875, 0.14763641357421875, 1.7306022644042969, 2.3721237182617188, 8.730552673339844, 11.8719482421875, 1.898977279663086, 9.943336486816406, 5.869342803955078, 8.857986450195312, -2.3757667541503906, 8.765941619873047, 6.2721710205078125, 9.0574951171875, 3.2350692749023438, 2.4360084533691406, 17.045654296875, 5.657585144042969, 3.397693634033203, 7.5004425048828125, 4.068511962890625, 0.6409549713134766, 12.750114440917969, 11.651779174804688, -5.817878723144531, 14.097320556640625, 19.877243041992188, 7.253576278686523, 8.217613220214844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000057.npy"}
|
||||
{"epoch": 0.08370044052863436, "step": 58, "batch_size": 64, "mean": 7.2314958572387695, "std": 7.587307929992676, "min": -8.209304809570312, "p10": -0.09409408569335903, "median": 5.074516296386719, "p90": 17.332496643066413, "max": 27.43353271484375, "pos_frac": 0.890625, "sample": [15.403488159179688, -1.986886978149414, 0.5264472961425781, 4.802181243896484, 3.9004440307617188, 4.443574905395508, 19.800621032714844, 1.8984603881835938, 14.758003234863281, 2.761087417602539, 5.346851348876953, 7.550689697265625, 7.2381591796875, 3.971466064453125, 2.3765411376953125, 12.970939636230469, 10.616981506347656, 9.054763793945312, 4.4847869873046875, 2.8446903228759766, 7.840087890625, 14.838478088378906, 14.253616333007812, -1.5484161376953125, 11.717880249023438, 2.0100250244140625, 9.591751098632812, 6.593893051147461, 22.153396606445312, 4.464899063110352, 27.43353271484375, 8.252315521240234, 6.7392578125, -0.23357009887695312, 14.957572937011719, 1.0489559173583984, 13.329757690429688, 3.9067153930664062, 2.2831039428710938, -2.905975341796875, 2.132274627685547, 2.620800018310547, 23.684722900390625, -1.6701278686523438, 10.299652099609375, 0.23134994506835938, 0.9682388305664062, 7.3302001953125, -4.764892578125, 24.25726318359375, 1.9542407989501953, 6.192768096923828, 1.042104721069336, 14.994373321533203, 11.020126342773438, -8.209304809570312, 23.140701293945312, 1.7686767578125, 2.8341598510742188, 7.795013427734375, 15.808830261230469, 1.6522274017333984, 0.2562828063964844, 17.985496520996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000058.npy"}
|
||||
{"epoch": 0.08516886930983847, "step": 59, "batch_size": 64, "mean": 7.762610912322998, "std": 8.03657054901123, "min": -6.96087646484375, "p10": -1.4758525848388664, "median": 6.508495330810547, "p90": 20.11944274902344, "max": 33.51806640625, "pos_frac": 0.875, "sample": [8.478900909423828, 8.534904479980469, 16.38604736328125, 20.54559326171875, 16.526947021484375, 4.1981048583984375, 33.51806640625, 21.073760986328125, 7.405208587646484, 16.700210571289062, 0.6394138336181641, 7.848262786865234, -3.3642425537109375, 2.5791149139404297, 21.727737426757812, -0.7172050476074219, 2.49359130859375, 13.741157531738281, 6.1178741455078125, 21.023040771484375, 1.1713085174560547, 2.6273269653320312, 1.0985641479492188, 1.6935997009277344, 3.8902740478515625, 25.58697509765625, 2.512533187866211, 1.8366031646728516, 7.972637176513672, -6.96087646484375, 9.195137023925781, -2.0471115112304688, 8.143821716308594, 7.87860107421875, 8.888481140136719, 18.101219177246094, 3.03106689453125, 12.531158447265625, 7.6291961669921875, 3.8337440490722656, -5.681915283203125, -3.3923263549804688, 5.768293380737305, -1.8009872436523438, 4.255100250244141, 10.59274673461914, 7.392694473266602, 2.7421875, 8.158012390136719, 2.9860897064208984, 1.9731292724609375, 6.045402526855469, 3.3436756134033203, 9.493602752685547, 5.710531234741211, 15.159233093261719, 9.88150405883789, 4.192283630371094, 3.4220733642578125, -2.6815185546875, 18.467784881591797, 19.125091552734375, 6.899116516113281, 20.684539794921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000059.npy"}
|
||||
{"epoch": 0.08663729809104258, "step": 60, "batch_size": 64, "mean": 6.3241682052612305, "std": 7.149575233459473, "min": -15.154327392578125, "p10": -1.5509704589843745, "median": 6.941120147705078, "p90": 12.870755767822265, "max": 26.09717559814453, "pos_frac": 0.8125, "sample": [7.772762298583984, -4.3381500244140625, 1.8671436309814453, 12.586555480957031, 1.129241943359375, 3.3859424591064453, 11.504657745361328, 8.588874816894531, 1.412109375, -15.154327392578125, 7.273105621337891, -1.7334671020507812, 2.0217418670654297, 4.2630615234375, 11.348381042480469, 7.134002685546875, 5.3531341552734375, 11.232101440429688, 4.285865783691406, 10.66119384765625, 13.655441284179688, 12.890396118164062, 20.25445556640625, 8.939361572265625, 8.660385131835938, 7.838737487792969, -0.7427101135253906, 6.748237609863281, 11.085479736328125, -0.10377311706542969, 9.291255950927734, 19.504470825195312, 4.252315521240234, 5.725822448730469, 7.868167877197266, -1.9703311920166016, 11.325515747070312, 9.035263061523438, 2.09368896484375, -5.320152282714844, 12.011856079101562, 3.3490562438964844, 5.6312255859375, 9.035873413085938, 4.272705078125, -0.6251983642578125, -11.229999542236328, 17.821121215820312, 20.603271484375, 11.759429931640625, 12.824928283691406, 2.9991798400878906, -2.8698081970214844, -1.1251449584960938, 11.506567001342773, 6.611139297485352, 26.09717559814453, 9.854515075683594, 1.081207275390625, 9.641241073608398, -0.6053009033203125, 8.437675476074219, 4.663089752197266, 1.3750057220458984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000060.npy"}
|
||||
{"epoch": 0.0881057268722467, "step": 61, "batch_size": 64, "mean": 7.751380920410156, "std": 10.563068389892578, "min": -10.777801513671875, "p10": -1.4835662841796875, "median": 5.669303894042969, "p90": 19.042723846435546, "max": 58.73504638671875, "pos_frac": 0.8125, "sample": [3.0402069091796875, 1.2606353759765625, -1.7594680786132812, 1.5120468139648438, 15.111587524414062, 0.030153274536132812, 0.0592193603515625, 7.609966278076172, -0.6391677856445312, 27.598342895507812, 23.62494659423828, 20.295761108398438, 27.49463653564453, 14.265281677246094, 7.931144714355469, 9.417579650878906, -1.7299118041992188, 1.7934322357177734, 12.408622741699219, 13.048362731933594, 3.08721923828125, 5.166210174560547, 16.784469604492188, 8.55141830444336, 5.736328125, 6.793544769287109, 5.8477325439453125, 6.2378997802734375, 9.520050048828125, 8.87933349609375, -0.17832183837890625, -1.422149658203125, 12.647041320800781, 17.28009033203125, 16.798919677734375, 23.911865234375, 18.999801635742188, 1.9909439086914062, -0.72308349609375, 14.630386352539062, 3.1425323486328125, 0.2190723419189453, 13.570526123046875, 3.2499542236328125, 0.7549114227294922, 19.061119079589844, 0.8130722045898438, 3.6526756286621094, 58.73504638671875, 13.192985534667969, 13.33392333984375, -1.5098876953125, -10.777801513671875, 12.292640686035156, 2.447418212890625, -5.313610076904297, 5.6022796630859375, 4.028598785400391, -7.842437744140625, 2.7764434814453125, 7.544677734375, -6.125556945800781, -1.2789649963378906, 1.6056632995605469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000061.npy"}
|
||||
{"epoch": 0.08957415565345081, "step": 62, "batch_size": 64, "mean": 5.783695697784424, "std": 7.510776519775391, "min": -9.392425537109375, "p10": -3.406118583679199, "median": 5.499258995056152, "p90": 15.117897033691408, "max": 31.001075744628906, "pos_frac": 0.84375, "sample": [9.33590316772461, 1.5871353149414062, 0.6188144683837891, 11.317878723144531, 5.623346328735352, 12.476631164550781, 7.754613876342773, 0.9094123840332031, 10.067855834960938, -5.69683837890625, 14.690170288085938, 6.484733581542969, 1.9097270965576172, -0.11255073547363281, 9.219108581542969, 3.484363555908203, 10.504638671875, 5.85009765625, 0.3032722473144531, 5.162376403808594, 7.276885986328125, 7.830120086669922, 1.6441192626953125, 5.47035026550293, -4.6964111328125, 8.684799194335938, 4.252328872680664, 8.301094055175781, -0.2748908996582031, 31.001075744628906, 18.07978057861328, 7.836051940917969, 5.830085754394531, 2.613616943359375, 1.0655975341796875, 5.868461608886719, 1.0614700317382812, 18.877700805664062, -9.392425537109375, -4.0079498291015625, -6.7314605712890625, 0.7966537475585938, 1.5615882873535156, 0.7004947662353516, 1.4035110473632812, 9.390972137451172, 16.421993255615234, 9.849361419677734, -3.4855594635009766, 15.30120849609375, 8.874542236328125, 5.528167724609375, -6.399139404296875, 4.271322250366211, 13.005325317382812, 3.6473846435546875, 3.378307342529297, 3.8187942504882812, 27.011550903320312, 20.956222534179688, 7.319921493530273, 1.86920166015625, -3.2207565307617188, 6.074371337890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000062.npy"}
|
||||
{"epoch": 0.09104258443465492, "step": 63, "batch_size": 64, "mean": 7.922541618347168, "std": 8.270774841308594, "min": -13.621047973632812, "p10": -0.013949012756347468, "median": 7.200447082519531, "p90": 17.0286003112793, "max": 33.79638671875, "pos_frac": 0.890625, "sample": [25.904129028320312, 8.461700439453125, 4.610185623168945, 9.295780181884766, 3.290771484375, 1.4310073852539062, 4.796714782714844, -7.470481872558594, 6.767372131347656, 6.9789886474609375, -13.621047973632812, 16.461585998535156, 9.118064880371094, 7.421905517578125, 2.741636276245117, -1.076131820678711, 17.2716064453125, 4.901975631713867, 13.123703002929688, 21.518646240234375, 12.538787841796875, 14.44427490234375, 6.5146331787109375, 0.3166675567626953, -0.0915679931640625, 11.925065994262695, -4.099815368652344, 0.1671619415283203, 5.369293212890625, 3.385061264038086, 12.994232177734375, 8.539833068847656, 7.827781677246094, 10.0250244140625, 11.308357238769531, 12.519325256347656, 14.6500244140625, 14.387825012207031, 5.392181396484375, 2.797718048095703, 0.3379058837890625, 15.164077758789062, 2.6877593994140625, 5.468147277832031, -10.292533874511719, 13.984853744506836, 7.7433319091796875, 1.7837066650390625, 2.6796417236328125, 6.886566162109375, 3.392242431640625, 14.700668334960938, 18.237655639648438, 8.21563720703125, 18.381553649902344, 0.7491226196289062, 8.821523666381836, 33.79638671875, -2.9268722534179688, 15.851566314697266, 4.8056182861328125, 26.886985778808594, 5.055915832519531, 11.791221618652344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000063.npy"}
|
||||
{"epoch": 0.09251101321585903, "step": 64, "batch_size": 64, "mean": 9.295536041259766, "std": 10.532648086547852, "min": -18.506881713867188, "p10": -1.0077222824096679, "median": 7.777100563049316, "p90": 22.24172973632813, "max": 50.224151611328125, "pos_frac": 0.8125, "sample": [0.8104629516601562, 18.650978088378906, 27.682357788085938, 8.288200378417969, 5.0225067138671875, 12.538597106933594, 3.166471481323242, 16.624523162841797, -0.5787754058837891, 7.178434371948242, 2.2548904418945312, 11.149711608886719, 1.2436065673828125, 5.267768859863281, 12.522956848144531, 31.527664184570312, 4.8275604248046875, -1.6861343383789062, 18.63751220703125, 7.792116165161133, 12.578693389892578, -2.7594223022460938, 29.71771240234375, 8.758060455322266, -0.26900291442871094, 6.156181335449219, -0.9936618804931641, 7.512638092041016, -18.506881713867188, -0.25019073486328125, 7.7620849609375, -1.05120849609375, 50.224151611328125, 6.014289855957031, 1.7952136993408203, 10.829349517822266, 17.954566955566406, 21.195404052734375, 10.77020263671875, 6.005870819091797, 5.941476821899414, 23.639968872070312, 12.954597473144531, 10.148536682128906, -1.0137481689453125, -9.401832580566406, 25.897689819335938, 4.161491394042969, 22.690155029296875, 0.1589202880859375, 15.664352416992188, 13.997421264648438, 13.136711120605469, -1.4681930541992188, 13.387649536132812, -0.4260826110839844, 5.3662109375, 7.967075347900391, 14.219245910644531, 13.590560913085938, 1.7354469299316406, 11.910369873046875, 18.56627655029297, 5.7245635986328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000064.npy"}
|
||||
{"epoch": 0.09397944199706314, "step": 65, "batch_size": 64, "mean": 9.323431015014648, "std": 10.29684066772461, "min": -23.44318389892578, "p10": -0.7886083602905267, "median": 7.614355087280273, "p90": 23.8426498413086, "max": 31.716644287109375, "pos_frac": 0.84375, "sample": [31.716644287109375, 26.129592895507812, 17.722030639648438, 12.208755493164062, 6.243871688842773, 9.238227844238281, 0.7609634399414062, 13.67471694946289, 3.2313098907470703, 22.110450744628906, 17.06208038330078, 1.4043216705322266, -0.2063426971435547, 24.46185302734375, 28.618881225585938, 1.1579399108886719, 10.641578674316406, 2.5816593170166016, -4.25921630859375, 5.149528503417969, 1.3611392974853516, 19.076583862304688, 6.2925262451171875, -23.44318389892578, 17.346771240234375, 9.48529052734375, -4.116912841796875, 8.233596801757812, 14.236831665039062, 5.400997161865234, 6.128936767578125, 5.626010894775391, -2.2170066833496094, 5.095388412475586, 30.206100463867188, 7.230365753173828, 22.397842407226562, 16.082191467285156, 26.15880584716797, 7.918689727783203, -0.1398162841796875, -12.199317932128906, 10.519882202148438, 2.104175567626953, 7.113800048828125, 19.436981201171875, 25.12151336669922, -2.055938720703125, 6.223052978515625, 20.8599853515625, 12.287353515625, 4.893585205078125, 21.190109252929688, -1.0381507873535156, 13.614883422851562, 8.343795776367188, 6.436775207519531, 2.0091094970703125, 17.66339111328125, 10.73321533203125, -0.14093399047851562, 8.198896408081055, 0.06344223022460938, 7.310020446777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000065.npy"}
|
||||
{"epoch": 0.09544787077826726, "step": 66, "batch_size": 64, "mean": 8.572092056274414, "std": 10.30179214477539, "min": -9.97607421875, "p10": -0.7767261505126952, "median": 7.301836013793945, "p90": 19.487243270874025, "max": 48.81031799316406, "pos_frac": 0.84375, "sample": [7.851531982421875, 48.81031799316406, 0.27147674560546875, 15.803855895996094, 14.166618347167969, 13.490684509277344, 2.465360641479492, -0.3621673583984375, 10.792242050170898, 7.770965576171875, -9.066802978515625, 2.353099822998047, 6.889766693115234, -0.7033920288085938, 12.37460708618164, 3.975627899169922, 4.883661270141602, 0.2111797332763672, 2.540424346923828, 19.490341186523438, 8.464057922363281, 10.63027572631836, 0.7244663238525391, 10.842552185058594, 18.075031280517578, 2.5709686279296875, 4.759681701660156, -2.6942672729492188, -4.298465728759766, -9.97607421875, 10.805419921875, 17.67467498779297, 19.48001480102539, 8.305343627929688, 3.7365684509277344, 11.8487548828125, 6.739387512207031, -0.20126724243164062, 2.529296875, 14.763641357421875, 10.887619018554688, 4.170494079589844, -0.8081550598144531, 7.713905334472656, 6.059289932250977, -8.44927978515625, 10.338615417480469, 1.7090187072753906, -3.9325332641601562, 14.258186340332031, 4.470970153808594, 20.113967895507812, 27.224884033203125, 22.58477783203125, 4.835868835449219, 17.148727416992188, 2.1025619506835938, 8.548274993896484, 15.217384338378906, 2.7339229583740234, 34.59141540527344, 1.235565185546875, 33.182525634765625, 13.886428833007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000066.npy"}
|
||||
{"epoch": 0.09691629955947137, "step": 67, "batch_size": 64, "mean": 8.888347625732422, "std": 10.270591735839844, "min": -12.6123046875, "p10": -1.0431358337402339, "median": 6.335858345031738, "p90": 21.72581481933594, "max": 48.348846435546875, "pos_frac": 0.84375, "sample": [22.4154052734375, 4.302768707275391, 22.040023803710938, 0.7889404296875, 2.4396133422851562, -0.6254653930664062, 16.986412048339844, 5.361808776855469, -5.942237854003906, 2.378538131713867, 17.658584594726562, 10.546676635742188, 4.485160827636719, 16.557945251464844, -0.5509567260742188, -4.401222229003906, 9.645751953125, 10.269386291503906, 1.7987060546875, -12.6123046875, 20.992660522460938, 8.94561767578125, -0.2916984558105469, 0.0156402587890625, 6.1206512451171875, 2.8389739990234375, 22.381973266601562, 6.821538925170898, -1.222137451171875, 6.100379943847656, 2.084747314453125, 15.357494354248047, 7.468132019042969, 3.2556934356689453, 2.6862335205078125, 10.1065673828125, 15.717103958129883, 6.21049690246582, 1.3534317016601562, -5.283050537109375, 5.807960510253906, 18.341819763183594, 4.8824310302734375, 4.094364166259766, 26.87652587890625, 14.934883117675781, 6.542476654052734, 18.9532470703125, 14.452957153320312, 15.534774780273438, 13.357841491699219, 5.678361892700195, 12.18730354309082, 34.50895690917969, 6.461219787597656, 1.0819034576416016, 48.348846435546875, 3.299257278442383, -2.0787124633789062, 27.081008911132812, 20.613868713378906, 7.480228424072266, -3.379730224609375, 8.588441848754883], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000067.npy"}
|
||||
{"epoch": 0.09838472834067548, "step": 68, "batch_size": 64, "mean": 8.439209938049316, "std": 8.632508277893066, "min": -7.87646484375, "p10": 0.3489673614501954, "median": 5.976025581359863, "p90": 21.232487487792973, "max": 38.28321838378906, "pos_frac": 0.90625, "sample": [-5.196723937988281, 3.0496063232421875, 25.102508544921875, 8.484786987304688, 5.968709945678711, -6.6017608642578125, 8.618804931640625, 23.409584045410156, 1.3277130126953125, 0.4426383972167969, 13.613525390625, 15.928787231445312, 18.386123657226562, 12.370918273925781, 10.342475891113281, 0.3088226318359375, 7.000789642333984, 13.202770233154297, 4.068840026855469, 8.458213806152344, -0.9243392944335938, 4.846967697143555, 5.163341522216797, 3.0960540771484375, 7.897197723388672, 12.818130493164062, 19.32879638671875, 7.9530029296875, 9.582782745361328, 20.35498809814453, 21.608558654785156, 23.694263458251953, 1.51654052734375, 5.969367980957031, 10.539703369140625, 1.744943618774414, -0.42322731018066406, 5.982683181762695, 2.307220458984375, 5.214256286621094, 17.666240692138672, 2.947673797607422, 11.032407760620117, -7.87646484375, 6.9226226806640625, 5.769981384277344, 4.0675201416015625, 22.396087646484375, 1.8720855712890625, 1.572500228881836, 2.3526363372802734, 38.28321838378906, 25.97748565673828, 17.298843383789062, 13.755882263183594, 0.5924301147460938, 2.5528030395507812, 5.287498474121094, 11.365959167480469, -2.59228515625, 5.5172882080078125, 2.7041397094726562, 5.349601745605469, 8.734962463378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000068.npy"}
|
||||
{"epoch": 0.09985315712187959, "step": 69, "batch_size": 64, "mean": 10.210320472717285, "std": 10.07213020324707, "min": -4.614227294921875, "p10": -0.2778038024902343, "median": 7.255146026611328, "p90": 21.784290313720707, "max": 47.442535400390625, "pos_frac": 0.875, "sample": [5.225372314453125, 6.988487243652344, 31.728424072265625, 9.722320556640625, 13.2677001953125, 16.66606903076172, -4.387451171875, -1.38519287109375, -0.16693878173828125, 4.862602233886719, 5.6932373046875, 9.318378448486328, -0.3253173828125, 9.506423950195312, 7.5218048095703125, 15.63107681274414, 27.774688720703125, 7.6299285888671875, 18.05620574951172, 15.939804077148438, -0.5814285278320312, 25.506301879882812, 13.102310180664062, 10.248645782470703, 19.25043487548828, 14.28802490234375, 4.647808074951172, 17.103797912597656, 13.265281677246094, 6.047941207885742, 1.003683090209961, 0.2750816345214844, 20.223121643066406, 47.442535400390625, 4.877902984619141, 20.850303649902344, 5.99371337890625, 15.393829345703125, 17.737838745117188, 0.888580322265625, 1.7088623046875, 16.855453491210938, 10.956623077392578, 2.3012619018554688, -0.8553905487060547, 15.043807983398438, 6.222434997558594, 37.88945770263672, 4.844207763671875, 7.996728897094727, 6.418699264526367, -4.614227294921875, 6.38916015625, 3.3197021484375, 1.6865272521972656, 6.95819091796875, 22.1845703125, 23.04693603515625, 0.118499755859375, 20.165786743164062, 1.9933891296386719, 1.348398208618164, 5.345766067504883, -0.697662353515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000069.npy"}
|
||||
{"epoch": 0.1013215859030837, "step": 70, "batch_size": 64, "mean": 11.062643051147461, "std": 11.917571067810059, "min": -10.566650390625, "p10": -1.5904834747314451, "median": 10.60970687866211, "p90": 27.611377716064467, "max": 43.939453125, "pos_frac": 0.828125, "sample": [6.481409072875977, 1.5330047607421875, 11.825740814208984, 20.22284698486328, 24.313865661621094, 13.374734878540039, -8.865280151367188, -1.6339263916015625, 14.831916809082031, 3.727611541748047, -2.2104644775390625, 13.981391906738281, -2.599609375, 9.2703857421875, 40.57231140136719, 1.7953929901123047, 10.442169189453125, 29.02459716796875, 11.655538558959961, 43.939453125, 12.527694702148438, 10.96624755859375, 2.1287155151367188, 30.851409912109375, -0.10277175903320312, 18.05353546142578, 19.9752197265625, 13.49555778503418, -2.0486621856689453, 7.459211349487305, 6.261474609375, 29.058250427246094, 1.95562744140625, 0.6033477783203125, 1.3369827270507812, -1.4891166687011719, 14.75082015991211, 3.2827835083007812, 12.890251159667969, -1.8209667205810547, 3.3582191467285156, 3.9806041717529297, -0.49604034423828125, 1.7791900634765625, 11.306352615356445, 1.32366943359375, 20.222084045410156, 24.163177490234375, -10.566650390625, 5.396938323974609, 18.502777099609375, 3.3631210327148438, 1.6779327392578125, 35.889556884765625, 14.141765594482422, -0.8359031677246094, 37.099456787109375, 10.777244567871094, 22.908294677734375, 17.96690559387207, 3.2650089263916016, 24.032943725585938, 13.770694732666016, 23.163116455078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000070.npy"}
|
||||
{"epoch": 0.1027900146842878, "step": 71, "batch_size": 64, "mean": 12.967889785766602, "std": 13.040888786315918, "min": -9.671022415161133, "p10": 0.6459039688110366, "median": 10.924520492553711, "p90": 24.64378356933594, "max": 54.0628662109375, "pos_frac": 0.90625, "sample": [-6.254905700683594, 45.488372802734375, 6.412208557128906, 2.1165237426757812, 6.444156646728516, 14.086128234863281, 6.473358154296875, 12.960357666015625, 19.192527770996094, 5.960945129394531, 23.7900390625, 7.596223831176758, 19.037017822265625, 4.907894134521484, 3.3328628540039062, 7.302177429199219, 11.586833953857422, 16.745361328125, -1.1845626831054688, 2.055360794067383, 4.040550231933594, 47.970977783203125, 8.138633728027344, 7.235958099365234, 17.872222900390625, 17.383056640625, 6.538330078125, 13.730186462402344, 13.430805206298828, -9.145393371582031, 4.3208465576171875, 13.377487182617188, 12.544784545898438, 0.041851043701171875, 5.310789108276367, 18.85700225830078, 20.518234252929688, -0.01605224609375, 22.010879516601562, 17.654502868652344, 4.9993743896484375, 14.251388549804688, 6.5654296875, 17.72760009765625, 17.213043212890625, 19.429420471191406, 4.285266876220703, 38.97434997558594, 51.3590087890625, 54.0628662109375, 2.5043869018554688, 25.009674072265625, -9.671022415161133, -1.1567001342773438, 15.573104858398438, 7.407703399658203, 8.282093048095703, 11.976860046386719, 20.552658081054688, 17.416114807128906, 34.136566162109375, 2.7325592041015625, 6.186466217041016, 10.26220703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000071.npy"}
|
||||
{"epoch": 0.10425844346549193, "step": 72, "batch_size": 64, "mean": 13.816070556640625, "std": 14.3781099319458, "min": -16.06719970703125, "p10": -2.5835136413574187, "median": 11.693943977355957, "p90": 34.9550491333008, "max": 60.88427734375, "pos_frac": 0.890625, "sample": [19.25402069091797, 16.019384384155273, 6.416450500488281, 14.029388427734375, 6.2328338623046875, 4.14979362487793, 9.550338745117188, 28.28662109375, -16.06719970703125, 27.83795166015625, 16.720638275146484, 15.382070541381836, 8.083620071411133, 0.7740478515625, 18.391132354736328, 10.45346450805664, 14.235748291015625, 29.031814575195312, 16.53284454345703, 36.64581298828125, 42.227447509765625, -4.227685928344727, 7.540809631347656, 11.36882209777832, 18.18042755126953, 3.4405593872070312, 20.103973388671875, 10.957527160644531, 12.019065856933594, 18.52965545654297, 3.3009109497070312, 5.7960205078125, 12.49102783203125, -15.069808959960938, 37.88615417480469, 31.009933471679688, 21.256057739257812, 7.658658981323242, 23.578125, -4.108863830566406, 9.895801544189453, 44.70814514160156, 60.88427734375, -7.480140686035156, 10.516691207885742, 25.807327270507812, 8.080062866210938, 8.633068084716797, 1.1737384796142578, 38.50825500488281, -12.3619384765625, 4.251438140869141, 2.5411224365234375, 17.95770263671875, 15.271907806396484, 20.340850830078125, 10.816150665283203, 16.872390747070312, 40.09521484375, 12.559358596801758, 6.894725799560547, 4.282623291015625, -4.022468566894531, 2.1026268005371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000072.npy"}
|
||||
{"epoch": 0.10572687224669604, "step": 73, "batch_size": 64, "mean": 13.781963348388672, "std": 20.36886215209961, "min": -28.773956298828125, "p10": -10.054238128662108, "median": 10.669946670532227, "p90": 36.3125358581543, "max": 72.32838439941406, "pos_frac": 0.78125, "sample": [32.891021728515625, -16.087936401367188, 8.15865707397461, 16.390457153320312, 7.589086532592773, 39.04303741455078, -7.740203857421875, 26.679183959960938, 7.8488311767578125, -0.553619384765625, 31.03955078125, 35.91743469238281, 27.023818969726562, -1.3108062744140625, 0.164794921875, -17.75445556640625, 62.028533935546875, -2.1314926147460938, 11.707555770874023, 20.888458251953125, 17.82680892944336, 8.054458618164062, 10.799690246582031, 1.4093818664550781, 2.422008514404297, 10.540203094482422, -19.824695587158203, 11.242080688476562, 33.697357177734375, 16.86540985107422, 14.137619018554688, 3.744943618774414, 62.453216552734375, -12.566787719726562, 4.9406890869140625, 33.84361267089844, 32.51075744628906, -10.423866271972656, 5.147859573364258, 2.2577877044677734, -3.0493621826171875, -6.61297607421875, -11.238006591796875, 7.496612548828125, 17.808853149414062, 17.692901611328125, 2.912261962890625, 5.899587631225586, 22.97028350830078, 46.34474182128906, 16.550994873046875, 6.728275299072266, 23.89788818359375, 6.242961883544922, 16.012176513671875, -9.1917724609375, 15.27899169921875, 62.66172790527344, 30.223648071289062, 72.32838439941406, 3.4063987731933594, 36.48186492919922, 19.102767944335938, -28.773956298828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000073.npy"}
|
||||
{"epoch": 0.10719530102790015, "step": 74, "batch_size": 64, "mean": 15.132284164428711, "std": 22.839929580688477, "min": -42.27965545654297, "p10": -3.5770503997802727, "median": 11.42477798461914, "p90": 34.982641601562506, "max": 136.45916748046875, "pos_frac": 0.796875, "sample": [28.31018829345703, 9.908195495605469, 14.735977172851562, 0.003437042236328125, 26.01892852783203, 8.08060073852539, 11.364250183105469, 60.376441955566406, 2.403644561767578, 18.723350524902344, 17.024879455566406, 10.749216079711914, 21.426509857177734, 2.126129150390625, 17.95672607421875, 136.45916748046875, 9.2706298828125, 33.23310089111328, 16.439876556396484, 3.619171142578125, 1.7146186828613281, 55.90876770019531, 4.1660003662109375, 35.732444763183594, 28.005393981933594, 11.025238037109375, -3.9658164978027344, -0.7055206298828125, -0.5548057556152344, -20.271026611328125, 15.782886505126953, -2.0768814086914062, 0.0077056884765625, 10.327667236328125, -42.27965545654297, 22.280357360839844, -2.6699295043945312, 28.582698822021484, 5.329959869384766, -4.757488250732422, -12.024948120117188, 11.485305786132812, 20.418731689453125, 24.689987182617188, -0.2878875732421875, 50.184234619140625, 15.618255615234375, 2.89111328125, 10.612411499023438, 9.330867767333984, 11.795063018798828, 39.304290771484375, 20.16973114013672, 25.871795654296875, 41.357154846191406, 16.986618041992188, 5.10713005065918, 19.24950408935547, 30.171966552734375, 29.142333984375, -0.5223846435546875, -4.995094299316406, -4.017608642578125, 16.11459732055664], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000074.npy"}
|
||||
{"epoch": 0.10866372980910426, "step": 75, "batch_size": 64, "mean": 21.468948364257812, "std": 21.70078468322754, "min": -13.97723388671875, "p10": -3.0400632858276344, "median": 17.066402435302734, "p90": 52.788874816894534, "max": 78.84217834472656, "pos_frac": 0.859375, "sample": [-0.7579669952392578, 27.134475708007812, 18.273757934570312, 15.658042907714844, 19.10326385498047, 13.096332550048828, 11.179420471191406, 7.683586120605469, 50.19306945800781, 73.18084716796875, 33.08892059326172, 16.349878311157227, -8.318443298339844, 29.865859985351562, 78.84217834472656, 16.765949249267578, 44.225860595703125, 66.81253051757812, 10.366172790527344, -0.6760120391845703, 46.767791748046875, 26.613601684570312, 17.15314483642578, 10.923103332519531, 15.064620971679688, 52.07664489746094, 53.0941162109375, 3.612110137939453, 9.261268615722656, 33.31418228149414, 19.262710571289062, 18.70615005493164, 4.1413726806640625, 70.39305114746094, -6.079265594482422, -4.018104553222656, 27.261367797851562, -13.97723388671875, 22.13128662109375, 21.87777328491211, 7.601047515869141, 13.610748291015625, -9.576698303222656, 9.186859130859375, -7.960796356201172, 31.179763793945312, 3.2708072662353516, 13.457260131835938, 42.618377685546875, 28.4334716796875, 4.223934173583984, -7.154998779296875, 9.388154983520508, 7.230358123779297, 17.437335968017578, 5.8655853271484375, 7.2853851318359375, 25.832305908203125, 16.979660034179688, 18.764617919921875, 40.051918029785156, 63.242462158203125, 66.06982421875, 17.32782745361328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000075.npy"}
|
||||
{"epoch": 0.11013215859030837, "step": 76, "batch_size": 64, "mean": 13.827641487121582, "std": 16.98805046081543, "min": -47.10325622558594, "p10": -2.9663013458251943, "median": 13.02697467803955, "p90": 32.70941276550293, "max": 72.02825927734375, "pos_frac": 0.84375, "sample": [14.402002334594727, 6.631404876708984, 2.7507076263427734, 25.86962890625, 4.132087707519531, 17.821197509765625, 9.426788330078125, 29.423492431640625, 32.50593948364258, 15.101051330566406, 17.31999969482422, 22.9202880859375, 10.264041900634766, 34.26305389404297, 4.245750427246094, 5.731576919555664, 9.360721588134766, 72.02825927734375, 36.901329040527344, 6.186164855957031, -7.727622985839844, 13.142671585083008, -0.17510414123535156, 13.4754638671875, -16.392623901367188, 7.074518203735352, 6.120288848876953, 16.854644775390625, 6.578453063964844, 7.831806182861328, 32.31599426269531, 18.22161102294922, 23.543800354003906, 9.682598114013672, 29.662643432617188, 4.480556488037109, -3.4132652282714844, 28.477264404296875, 3.9246978759765625, -11.896217346191406, 7.714160919189453, 2.674741744995117, -6.805604934692383, -8.8187255859375, 17.792076110839844, 32.79661560058594, 52.5496826171875, 30.779022216796875, 9.233997344970703, 6.03448486328125, 13.456306457519531, -1.9233856201171875, 16.921920776367188, 19.277206420898438, 17.22779083251953, 12.707252502441406, -0.6357612609863281, 15.2730712890625, 38.14142608642578, 25.27729034423828, 24.809005737304688, 35.61077880859375, 12.911277770996094, -47.10325622558594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000076.npy"}
|
||||
{"epoch": 0.11160058737151249, "step": 77, "batch_size": 64, "mean": 22.32929039001465, "std": 23.605266571044922, "min": -44.575531005859375, "p10": -0.009384536743162641, "median": 22.88666534423828, "p90": 59.33101196289063, "max": 81.01947021484375, "pos_frac": 0.890625, "sample": [18.13298797607422, 45.66388702392578, 32.752750396728516, 44.03402328491211, 44.95167541503906, -0.6109237670898438, 63.406280517578125, 23.188640594482422, 9.332284927368164, 10.851520538330078, 6.806142807006836, 37.338409423828125, 41.15558624267578, 6.380466461181641, 4.2620086669921875, 28.37572479248047, -44.575531005859375, 23.718482971191406, 38.38549041748047, 33.368019104003906, 14.692405700683594, -2.961223602294922, 10.796504974365234, 1.3942070007324219, 4.485618591308594, 59.4029541015625, 3.487842559814453, 29.17028045654297, 12.362350463867188, -5.814794540405273, 33.68381118774414, 30.148414611816406, 13.793848037719727, 69.47102355957031, 34.897003173828125, 10.347427368164062, 62.120452880859375, 81.01947021484375, 22.074989318847656, 14.3050537109375, -35.20143127441406, 8.466323852539062, -17.05352783203125, 3.228893280029297, 13.114439010620117, 27.029571533203125, 32.89910125732422, 26.02972984313965, 27.749046325683594, 27.39019012451172, 8.250839233398438, -7.718109130859375, 5.085981369018555, 27.229759216308594, 29.592124938964844, 23.369155883789062, 4.567596435546875, 60.40936279296875, 71.66456604003906, 12.969547271728516, 25.24825668334961, 59.16314697265625, 22.58469009399414, 7.209747314453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000077.npy"}
|
||||
{"epoch": 0.1130690161527166, "step": 78, "batch_size": 64, "mean": 17.180259704589844, "std": 20.03946876525879, "min": -23.506576538085938, "p10": -3.5817176818847654, "median": 16.84520721435547, "p90": 44.3201026916504, "max": 90.25717163085938, "pos_frac": 0.796875, "sample": [0.7560653686523438, 2.0338134765625, 43.148406982421875, 18.364959716796875, -3.006500244140625, 48.69624328613281, 17.636594772338867, 90.25717163085938, 27.566162109375, 23.42013168334961, 21.720401763916016, 8.661457061767578, -5.03076171875, 15.09321403503418, 22.541088104248047, 21.042041778564453, -0.3341827392578125, 9.525323867797852, -5.297096252441406, 3.5680999755859375, 7.036409378051758, 18.671907424926758, 27.56399917602539, 25.639930725097656, 20.608577728271484, 15.389995574951172, 18.840717315673828, 18.40875244140625, 26.158096313476562, 6.249654769897461, 44.82225799560547, 16.887697219848633, 6.560981750488281, -11.479549407958984, 27.671165466308594, -11.230575561523438, 57.26739501953125, 22.213531494140625, 46.16477966308594, -1.932464599609375, 39.08626937866211, 4.102210998535156, 51.975616455078125, 34.288883209228516, 31.41309356689453, -1.2860374450683594, 11.881744384765625, 3.9230728149414062, 13.597698211669922, 8.07745361328125, 6.01078987121582, -1.232156753540039, 16.802717208862305, -3.3173751831054688, 32.79240417480469, -23.506576538085938, 54.251007080078125, 4.426837921142578, -19.302085876464844, 27.147323608398438, -3.69500732421875, 38.776031494140625, 2.7525634765625, 28.694259643554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000078.npy"}
|
||||
{"epoch": 0.1145374449339207, "step": 79, "batch_size": 64, "mean": 18.81760025024414, "std": 24.413606643676758, "min": -24.213485717773438, "p10": -7.344851303100585, "median": 13.980016708374023, "p90": 53.02166137695314, "max": 82.12332153320312, "pos_frac": 0.765625, "sample": [-7.9947052001953125, 56.17363739013672, 6.477750778198242, 73.21742248535156, 4.394950866699219, 0.3094635009765625, -2.391073226928711, -11.799942016601562, 45.09690856933594, 38.816131591796875, 19.276168823242188, -24.213485717773438, 25.462841033935547, 74.54214477539062, 9.391304016113281, 27.44219970703125, 77.51028442382812, 55.00885009765625, 17.801660537719727, -11.2138671875, 82.12332153320312, 17.372920989990234, -4.2426300048828125, -12.58941650390625, 30.441238403320312, 1.408651351928711, 0.0367431640625, 17.34609603881836, 14.617610931396484, 65.95507049560547, 2.5322952270507812, 9.008466720581055, 13.342422485351562, 28.287113189697266, -12.069488525390625, 10.659782409667969, -0.27660369873046875, 7.800773620605469, 45.22723388671875, 44.29402160644531, 21.470882415771484, 34.27330017089844, 35.9044189453125, 20.573909759521484, 28.32660675048828, -5.562044143676758, 2.929229736328125, 33.80241394042969, 39.764366149902344, 11.480634689331055, 16.119417190551758, -5.828525543212891, -1.3932151794433594, 41.10694885253906, -0.993865966796875, 23.4757080078125, 5.399051666259766, 6.779638290405273, 0.02741241455078125, -10.012939453125, 7.669780731201172, 48.3848876953125, -4.309787750244141, 20.35382080078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000079.npy"}
|
||||
{"epoch": 0.11600587371512482, "step": 80, "batch_size": 64, "mean": 17.45241928100586, "std": 25.401348114013672, "min": -26.21485137939453, "p10": -8.462450408935545, "median": 12.678586959838867, "p90": 46.489024353027354, "max": 103.24301147460938, "pos_frac": 0.78125, "sample": [26.528656005859375, 9.154792785644531, -6.3976593017578125, 25.261329650878906, 6.798360824584961, 3.8915328979492188, -3.4553260803222656, -10.232154846191406, 34.849456787109375, 0.78094482421875, 4.393182754516602, 17.270401000976562, -17.037979125976562, -21.705596923828125, -8.761024475097656, 24.21019744873047, 103.24301147460938, 40.240501403808594, 88.2821044921875, 48.11095428466797, 26.328765869140625, 7.3935699462890625, 43.870582580566406, 47.61121368408203, 36.30479431152344, 16.466564178466797, -1.3803176879882812, 35.65776062011719, 15.38949203491211, 9.230819702148438, 3.7678680419921875, -18.169761657714844, 4.673778533935547, -7.765777587890625, 26.914962768554688, -26.21485137939453, 61.96168518066406, 32.46380615234375, -9.115848541259766, 94.2130126953125, 10.677335739135742, 3.7645721435546875, 3.142498016357422, 2.2445831298828125, 48.13788604736328, 13.786396026611328, 24.828285217285156, 32.83238220214844, 17.361499786376953, 11.570777893066406, -3.909130096435547, -3.029794692993164, 1.0920467376708984, 7.502922058105469, 24.03880500793457, 23.2154541015625, 6.778236389160156, 3.4544830322265625, 33.899566650390625, 26.728561401367188, 13.918853759765625, 40.521881103515625, -5.448339462280273, 14.817245483398438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000080.npy"}
|
||||
{"epoch": 0.11747430249632893, "step": 81, "batch_size": 64, "mean": 24.351259231567383, "std": 29.816463470458984, "min": -32.436187744140625, "p10": -9.43324737548828, "median": 20.669248580932617, "p90": 61.09842529296875, "max": 126.4140625, "pos_frac": 0.84375, "sample": [46.223548889160156, 36.08807373046875, -32.436187744140625, 41.802818298339844, 26.641773223876953, 63.50971984863281, 21.595989227294922, -13.18533706665039, 7.567846298217773, 34.20857620239258, 33.333160400390625, -23.722747802734375, 3.1323699951171875, 26.72747039794922, 94.0321044921875, 11.434906005859375, 25.875869750976562, 39.36775207519531, 61.285797119140625, -17.254859924316406, 1.3282527923583984, 10.331466674804688, 21.762470245361328, 75.95185852050781, 24.939125061035156, 12.250293731689453, 17.574066162109375, 8.100479125976562, 7.668081283569336, -8.694923400878906, 27.30707550048828, 8.041412353515625, 5.4796905517578125, 13.467658996582031, 30.291748046875, 126.4140625, 44.479007720947266, 19.742507934570312, 2.71868896484375, 60.661224365234375, 44.38433837890625, 51.86328125, 16.686599731445312, -1.20220947265625, 4.331798553466797, -2.854015350341797, -20.37094497680664, 15.576652526855469, 13.958770751953125, 31.880996704101562, 16.11886978149414, 13.54484748840332, 21.986679077148438, -16.44585418701172, 40.84547424316406, 48.65666198730469, 113.32733154296875, 63.27455139160156, 10.20257568359375, 38.52271270751953, 2.6712493896484375, 29.846527099609375, -9.749671936035156, 35.38047790527344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000081.npy"}
|
||||
{"epoch": 0.11894273127753303, "step": 82, "batch_size": 64, "mean": 19.267547607421875, "std": 31.920446395874023, "min": -64.02735900878906, "p10": -17.288195800781246, "median": 16.9479923248291, "p90": 53.92407150268555, "max": 127.28945922851562, "pos_frac": 0.796875, "sample": [-9.750221252441406, 34.86468505859375, -21.040687561035156, 19.172801971435547, 22.066165924072266, 19.714120864868164, -27.763145446777344, 16.26761245727539, 26.744354248046875, 59.89954376220703, 73.34524536132812, 13.848930358886719, -14.49169921875, 9.711151123046875, 16.535654067993164, 112.74763488769531, 13.544136047363281, -55.79216003417969, -9.959213256835938, 36.383941650390625, 2.5497360229492188, 33.75566101074219, 53.585121154785156, 4.551229476928711, -18.4866943359375, -7.573822021484375, 4.159309387207031, 25.557235717773438, 21.825531005859375, 13.055648803710938, 21.50812530517578, 38.49835205078125, 39.860870361328125, -0.9152736663818359, 78.16015625, 20.556594848632812, 54.0693359375, 16.830135345458984, 10.015348434448242, 6.9850921630859375, -2.1306686401367188, 31.530426025390625, 64.56402587890625, -20.581314086914062, 30.68169403076172, 22.99687957763672, 7.172384262084961, 48.87710189819336, 37.881996154785156, -24.232864379882812, 10.322921752929688, 7.777008056640625, -64.02735900878906, 9.081695556640625, 34.8350830078125, 22.734176635742188, 5.299980163574219, 27.832130432128906, 17.06584930419922, 127.28945922851562, 16.405723571777344, 18.402265548706055, 0.058349609375, 48.68950653076172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000082.npy"}
|
||||
{"epoch": 0.12041116005873716, "step": 83, "batch_size": 64, "mean": 22.84069061279297, "std": 28.214187622070312, "min": -27.526336669921875, "p10": -10.007940673828124, "median": 18.271611213684082, "p90": 67.90889434814456, "max": 91.37033081054688, "pos_frac": 0.8125, "sample": [22.333282470703125, 26.330059051513672, 17.455020904541016, 72.20626831054688, 49.002281188964844, 13.231971740722656, -7.9163360595703125, 75.40338134765625, 62.321380615234375, 4.619537353515625, 13.132110595703125, 18.077974319458008, 1.5877838134765625, -5.7126617431640625, 19.31597900390625, 74.33956909179688, -15.591552734375, 91.37033081054688, 11.65838623046875, 77.77008819580078, 42.41387939453125, 12.887504577636719, 52.80116271972656, -21.51507568359375, -27.526336669921875, -10.748725891113281, 58.084014892578125, -22.61536407470703, 17.96187400817871, 70.30354309082031, 10.622169494628906, 16.0955810546875, 5.57771110534668, 0.0003204345703125, 27.27342987060547, 12.845588684082031, 27.434894561767578, -0.09157943725585938, 43.12648010253906, 14.959842681884766, 22.369232177734375, 21.16701889038086, 47.3565673828125, 76.751953125, 20.11516571044922, 51.59405517578125, 5.4237823486328125, -20.443328857421875, -2.5941925048828125, 2.0493545532226562, -25.534255981445312, 55.684417724609375, 9.561225891113281, 44.997039794921875, 26.904945373535156, 26.02558135986328, -8.279441833496094, 7.803638458251953, 19.70560073852539, 34.63159942626953, 44.42131805419922, 18.465248107910156, 29.65215301513672, 3.149749755859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000083.npy"}
|
||||
{"epoch": 0.12187958883994127, "step": 84, "batch_size": 64, "mean": 19.657663345336914, "std": 21.597183227539062, "min": -46.52813720703125, "p10": -1.4343183517456053, "median": 17.270617485046387, "p90": 45.266647338867195, "max": 88.32102966308594, "pos_frac": 0.84375, "sample": [7.3150634765625, 17.35345458984375, 3.456817626953125, 5.25990104675293, 49.4776611328125, 5.989471435546875, 6.3529815673828125, 21.61016845703125, 17.187780380249023, 9.458877563476562, 31.579181671142578, 37.98241424560547, -21.702041625976562, 13.824951171875, 22.481311798095703, 31.630950927734375, 10.827678680419922, 88.32102966308594, 32.15102005004883, 3.470062255859375, 6.765953063964844, 30.260513305664062, 6.448844909667969, 6.622707366943359, -1.1233863830566406, 44.0552978515625, 27.15888214111328, 31.627243041992188, -46.52813720703125, -1.4943294525146484, 55.43086242675781, 2.9230422973632812, 38.166358947753906, 16.317481994628906, 19.867996215820312, 9.576240539550781, 50.010719299316406, 45.785797119140625, 32.82057189941406, 21.511825561523438, 2.9722061157226562, 33.17200469970703, -4.782358169555664, 17.101032257080078, 29.828369140625, 30.85129165649414, -1.2942924499511719, -4.347932815551758, 35.17631530761719, 34.41864013671875, -2.6319808959960938, 43.586883544921875, 58.22688293457031, 20.476905822753906, 63.732696533203125, 15.91595458984375, 32.71379852294922, 8.672439575195312, -18.935287475585938, 32.2376708984375, 13.108604431152344, 20.118820190429688, 7.695903778076172, -0.15729141235351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000084.npy"}
|
||||
{"epoch": 0.12334801762114538, "step": 85, "batch_size": 64, "mean": 20.310611724853516, "std": 25.771699905395508, "min": -32.311676025390625, "p10": -6.0521911621093745, "median": 18.26822853088379, "p90": 60.832456207275406, "max": 73.691650390625, "pos_frac": 0.71875, "sample": [44.640159606933594, 14.318824768066406, 30.83112335205078, 3.819368362426758, -2.4739990234375, 23.315975189208984, 29.686416625976562, 1.9911003112792969, 62.63123321533203, 9.733085632324219, -2.3899784088134766, 5.160430908203125, 67.89334106445312, 27.45557403564453, 25.613080978393555, 22.29895782470703, -4.218505859375, -18.86394500732422, 19.963943481445312, 0.39847564697265625, -32.311676025390625, 73.691650390625, 20.47577667236328, -2.642742156982422, -5.6969146728515625, 46.918067932128906, 7.203330993652344, -9.088134765625, 18.97191619873047, 11.148590087890625, 49.73353576660156, -6.2044525146484375, -17.88946533203125, -1.2958412170410156, 14.6390380859375, 5.267644882202148, 66.65660095214844, 17.56454086303711, 41.2265625, -0.18906593322753906, -4.484651565551758, 72.0872802734375, -1.8493518829345703, 47.993560791015625, 62.162811279296875, 15.769405364990234, 57.728294372558594, 70.73646545410156, 11.340835571289062, 5.790159225463867, 34.66230010986328, 52.147804260253906, -1.0893096923828125, -8.62994384765625, 19.995193481445312, 19.413286209106445, 54.81419372558594, 22.97698211669922, 30.70764923095703, 37.97896957397461, -23.105560302734375, 19.76754379272461, -1.069427490234375, 44.051048278808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000085.npy"}
|
||||
{"epoch": 0.12481644640234948, "step": 86, "batch_size": 64, "mean": 21.457523345947266, "std": 39.57339859008789, "min": -60.85264587402344, "p10": -16.118485641479488, "median": 15.523366928100586, "p90": 68.46669921875002, "max": 151.60061645507812, "pos_frac": 0.734375, "sample": [-13.710289001464844, 19.785301208496094, 70.87008666992188, -21.591629028320312, 36.259246826171875, 31.779708862304688, 28.334482192993164, -2.2919235229492188, 109.68243408203125, 11.942821502685547, 6.7793121337890625, -6.630435943603516, 0.820037841796875, 22.421707153320312, -8.932693481445312, 20.503433227539062, 50.417083740234375, 20.148727416992188, -3.5851478576660156, -55.731842041015625, 63.553497314453125, 23.358959197998047, 32.70528793334961, 11.543903350830078, 10.122398376464844, -17.150569915771484, -4.604576110839844, 70.57235717773438, -5.0691680908203125, 8.628934860229492, 39.839359283447266, 19.103912353515625, 8.118936538696289, -60.85264587402344, 48.24183654785156, 56.45330810546875, 29.00225067138672, -43.103416442871094, 79.13706970214844, 3.2117977142333984, 151.60061645507812, 19.48990249633789, 26.513458251953125, 11.822681427001953, -5.7925872802734375, 133.8702392578125, 6.935733795166016, 127.60360717773438, 49.69744873046875, -7.35205078125, -33.10346221923828, 40.42194366455078, 9.831554412841797, 34.9095458984375, 30.786758422851562, 35.91362380981445, 38.25819396972656, 6.416904449462891, 1.6532058715820312, 4.115016937255859, -5.393854141235352, -19.171485900878906, 3.2431182861328125, 20.927444458007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000086.npy"}
|
||||
{"epoch": 0.1262848751835536, "step": 87, "batch_size": 64, "mean": 25.20366859436035, "std": 34.9445915222168, "min": -81.3453369140625, "p10": -8.596290588378904, "median": 21.08606719970703, "p90": 69.55859222412111, "max": 133.6138153076172, "pos_frac": 0.8125, "sample": [23.76860237121582, 38.12870788574219, 0.1604442596435547, 10.338556289672852, 38.769134521484375, -9.482131958007812, 31.9761962890625, -0.6967716217041016, 0.8503036499023438, -5.82171630859375, -26.191650390625, 48.44287109375, -6.045326232910156, 71.85343933105469, 32.759727478027344, 61.79052734375, 56.087974548339844, 11.327280044555664, 92.81832885742188, 62.531402587890625, 21.57555389404297, 21.169044494628906, 7.770589828491211, 44.470733642578125, 41.641441345214844, 2.082895278930664, 40.790992736816406, 133.6138153076172, 13.925209045410156, 2.5697174072265625, 13.906850814819336, 76.80270385742188, 61.60200500488281, 10.85195541381836, 59.68342590332031, 27.188629150390625, 15.110147476196289, 41.55826187133789, 21.003089904785156, 79.55210876464844, 0.9561557769775391, 84.01286315917969, 12.587150573730469, 20.735511779785156, -81.3453369140625, 12.055929183959961, 42.52861785888672, -35.806907653808594, 39.74170684814453, 74.32247924804688, -17.52032470703125, -18.044536590576172, 9.105770111083984, 20.633407592773438, -6.529327392578125, 64.20394897460938, 32.494598388671875, 35.92259216308594, -39.380950927734375, 26.030792236328125, 15.876548767089844, 1.1436138153076172, 53.92070007324219, -4.845256805419922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000087.npy"}
|
||||
{"epoch": 0.1277533039647577, "step": 88, "batch_size": 64, "mean": 19.053417205810547, "std": 33.52738952636719, "min": -62.710205078125, "p10": -9.824423217773438, "median": 11.746100425720215, "p90": 62.039691162109406, "max": 135.19012451171875, "pos_frac": 0.765625, "sample": [10.342918395996094, 11.17022705078125, 66.85635375976562, 67.69534301757812, 35.35472869873047, 50.69169616699219, 4.008270263671875, 65.58047485351562, -9.805282592773438, 13.817499160766602, 35.71278381347656, 7.093955993652344, 2.0032196044921875, 50.04109191894531, -22.619003295898438, 32.75816345214844, -20.333282470703125, 16.21070098876953, -50.222198486328125, -62.710205078125, -5.289272308349609, 78.38034057617188, 40.450775146484375, -4.159416198730469, 46.077781677246094, 5.799715042114258, 29.752670288085938, -9.832626342773438, 23.698747634887695, 21.679275512695312, 42.083160400390625, 8.198188781738281, -1.6961784362792969, 27.062393188476562, 0.8258132934570312, 98.0433349609375, 9.512916564941406, 53.777862548828125, 5.247150421142578, 135.19012451171875, 5.642585754394531, 47.31306457519531, 3.9202117919921875, 14.181211471557617, 11.17437744140625, 26.196678161621094, 0.2919769287109375, -3.21661376953125, 36.19493103027344, 83.11990356445312, 36.15660858154297, 16.051071166992188, -0.376373291015625, 10.409568786621094, -54.9566650390625, -5.614715576171875, 11.071725845336914, -1.5190620422363281, 12.31782341003418, 14.731361389160156, 19.034208297729492, -21.342063903808594, 7.3952789306640625, 42.79138946533203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000088.npy"}
|
||||
{"epoch": 0.12922173274596183, "step": 89, "batch_size": 64, "mean": 21.37381362915039, "std": 28.52877426147461, "min": -67.52581787109375, "p10": -2.294877815246582, "median": 17.34477424621582, "p90": 50.94893493652345, "max": 126.81071472167969, "pos_frac": 0.859375, "sample": [5.2503509521484375, 1.5705909729003906, 5.693029403686523, 21.293169021606445, -10.389225006103516, 52.45512390136719, 43.10863494873047, 16.583412170410156, -8.728340148925781, 60.62822723388672, 60.333091735839844, 23.88985824584961, 22.58483123779297, 47.43449401855469, 37.320579528808594, 30.92409896850586, 13.735458374023438, 10.272750854492188, -2.186067581176758, 19.889572143554688, 15.522834777832031, 3.382547378540039, 16.512523651123047, 11.934364318847656, -10.400405883789062, 1.150247573852539, 1.6400222778320312, 10.326303482055664, 78.32260131835938, 20.6448974609375, 30.19757843017578, 91.16220092773438, 17.451129913330078, 45.81641387939453, 22.294315338134766, 2.2387847900390625, 13.034698486328125, 126.81071472167969, 5.008335113525391, -2.6902942657470703, 29.914073944091797, -0.990509033203125, 23.199655532836914, 31.55556869506836, 15.969499588012695, 5.724504470825195, 10.281135559082031, 5.188442230224609, 4.05279541015625, 36.97407531738281, 22.28466796875, 41.579620361328125, -2.341510772705078, 17.238418579101562, 44.769309997558594, -67.52581787109375, -26.8101806640625, 28.74205780029297, 18.980792999267578, 24.068355560302734, 25.653961181640625, 24.893646240234375, 95.13606262207031, 7.361942291259766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000089.npy"}
|
||||
{"epoch": 0.13069016152716592, "step": 90, "batch_size": 64, "mean": 29.19457244873047, "std": 43.23085403442383, "min": -54.22804260253906, "p10": -11.322014999389642, "median": 19.021102905273438, "p90": 94.59552001953126, "max": 153.40838623046875, "pos_frac": 0.8125, "sample": [69.64274597167969, -45.275062561035156, 30.534423828125, 96.49432373046875, 22.938953399658203, 37.20252990722656, 0.6296348571777344, -4.312568664550781, 60.677520751953125, -1.6740150451660156, 46.80181121826172, 18.55615997314453, 0.059604644775390625, 13.069839477539062, 39.829994201660156, 88.09457397460938, -5.582393646240234, 153.40838623046875, 55.02418518066406, 2.3401260375976562, 2.661092758178711, 10.793472290039062, 20.411277770996094, -13.781852722167969, 120.3668212890625, 17.42682647705078, 25.847793579101562, 126.70895385742188, 5.111869812011719, 24.673564910888672, 143.72174072265625, 23.422439575195312, -28.180908203125, 12.06500244140625, -5.411956787109375, -54.22804260253906, 28.607440948486328, 8.837440490722656, 65.24382019042969, 6.917366027832031, 15.483261108398438, 13.392860412597656, -22.642471313476562, -4.902599334716797, 23.007843017578125, 90.16497802734375, 102.59771728515625, -37.63182830810547, -32.795989990234375, 12.430564880371094, 38.738914489746094, 1.4065475463867188, 19.486045837402344, 38.056480407714844, 69.13296508789062, 33.91454315185547, 11.009637832641602, 11.104005813598633, 64.1973876953125, 17.466594696044922, 24.596900939941406, 17.826379776000977, 100.75027465820312, 41.986610412597656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000090.npy"}
|
||||
{"epoch": 0.13215859030837004, "step": 91, "batch_size": 64, "mean": 23.98028564453125, "std": 35.40739822387695, "min": -34.80876159667969, "p10": -20.299038696289063, "median": 22.010177612304688, "p90": 71.69305725097661, "max": 125.91748046875, "pos_frac": 0.75, "sample": [19.22840118408203, 54.05583190917969, -34.80876159667969, 8.11587905883789, 76.00567626953125, 0.6335220336914062, -19.288330078125, 45.20963668823242, 3.4870471954345703, 21.358619689941406, 43.294158935546875, 41.26301574707031, 43.17798614501953, 14.540840148925781, -34.351539611816406, 113.35618591308594, -12.01153564453125, 2.2467193603515625, 9.270065307617188, 87.01615905761719, 10.103515625, 34.06309127807617, 125.91748046875, -18.109352111816406, 29.559547424316406, 56.77101135253906, -20.20153045654297, 3.563629150390625, 23.407127380371094, -5.2826690673828125, -20.873214721679688, 22.77654457092285, 61.630279541015625, -20.781051635742188, 18.333648681640625, 22.66173553466797, 58.838043212890625, -14.312156677246094, 21.211875915527344, -5.007902145385742, 9.657478332519531, 24.832107543945312, 60.981712341308594, 2.5627098083496094, -20.34082794189453, 39.060638427734375, 24.193374633789062, 82.08660888671875, 26.364479064941406, 43.66693115234375, 10.2630615234375, -26.07025909423828, 0.8597812652587891, 29.8421630859375, -2.103363037109375, 42.992576599121094, -0.5650672912597656, 87.4637451171875, 46.776893615722656, 24.73352813720703, 54.37721252441406, -27.04216766357422, 49.03242492675781, 85.04322052001953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000091.npy"}
|
||||
{"epoch": 0.13362701908957417, "step": 92, "batch_size": 64, "mean": 24.30576515197754, "std": 34.64317321777344, "min": -49.610939025878906, "p10": -12.08062515258789, "median": 17.625999450683594, "p90": 59.92264404296875, "max": 130.342041015625, "pos_frac": 0.75, "sample": [-4.736431121826172, 6.051261901855469, 7.736442565917969, 50.00389862060547, -8.646575927734375, 6.230628967285156, -49.610939025878906, -16.57663345336914, 49.57014465332031, 25.22313690185547, 67.96310424804688, 0.39200592041015625, 45.790618896484375, 3.3288230895996094, -5.879783630371094, 114.86453247070312, 55.552024841308594, 130.342041015625, -42.61590576171875, -5.63311767578125, 26.557697296142578, -11.767768859863281, 28.946670532226562, -8.178218841552734, -19.34198760986328, 59.98042297363281, -12.214706420898438, 74.89144897460938, 31.744651794433594, 4.816656112670898, 75.17992401123047, -2.796192169189453, 35.96417999267578, 0.7165622711181641, 52.53321075439453, 8.771347045898438, 33.57599639892578, 109.60401916503906, 10.252046585083008, 59.224639892578125, 44.88542175292969, 59.78782653808594, -1.7420883178710938, -5.5070037841796875, 8.913005828857422, 26.021034240722656, 5.1680755615234375, 42.221649169921875, 36.0218505859375, 11.730024337768555, 0.4027843475341797, -13.000364303588867, 53.29384994506836, 16.520530700683594, 18.731468200683594, 13.741249084472656, 36.971466064453125, 53.53205871582031, 44.29435729980469, 25.1405029296875, 11.142030715942383, -13.35190200805664, 57.39692687988281, 35.44432830810547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000092.npy"}
|
||||
{"epoch": 0.13509544787077826, "step": 93, "batch_size": 64, "mean": 24.499828338623047, "std": 30.791046142578125, "min": -37.346946716308594, "p10": -3.8429769515991197, "median": 16.755695343017578, "p90": 59.61588363647461, "max": 155.72198486328125, "pos_frac": 0.828125, "sample": [19.267318725585938, 9.343378067016602, 27.359664916992188, 64.92682647705078, 59.60235595703125, 56.848541259765625, 4.8021087646484375, 23.55547332763672, 47.383399963378906, -14.975082397460938, 77.50340270996094, 21.88225555419922, 52.88262176513672, 52.631858825683594, 155.72198486328125, 13.55496597290039, -2.2406692504882812, 3.769195556640625, 2.3040313720703125, 91.3476791381836, -8.642738342285156, 13.84383773803711, 22.866928100585938, -0.27208709716796875, 34.95494079589844, 10.569305419921875, 33.24121856689453, 5.764991760253906, -1.0840682983398438, 11.627677917480469, -0.7038326263427734, 14.899665832519531, 9.750799179077148, 19.4051513671875, 34.16194152832031, 54.17938232421875, 66.6478042602539, -24.3245849609375, -4.529680252075195, 52.783721923828125, 47.352210998535156, 35.57536697387695, 39.22203826904297, 6.460422515869141, 10.387775421142578, 8.871467590332031, 30.24164581298828, 2.76507568359375, 59.621681213378906, 1.9394264221191406, -7.2735595703125, 62.10602569580078, 6.006202697753906, 17.746734619140625, 5.982931137084961, 5.278331756591797, -18.725914001464844, 32.13774871826172, 47.12609100341797, 35.285179138183594, 48.76128387451172, 15.764656066894531, -37.346946716308594, 0.091461181640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000093.npy"}
|
||||
{"epoch": 0.13656387665198239, "step": 94, "batch_size": 64, "mean": 28.207868576049805, "std": 32.23759078979492, "min": -23.186920166015625, "p10": -7.498330497741699, "median": 24.410072326660156, "p90": 70.15250167846679, "max": 116.23855590820312, "pos_frac": 0.78125, "sample": [26.42154312133789, -18.4141845703125, 45.847373962402344, 20.281700134277344, 5.890106201171875, -15.121150970458984, 10.536937713623047, 61.4686164855957, 5.80534553527832, -21.208648681640625, 24.948434829711914, 12.205291748046875, 26.05998992919922, 7.308433532714844, 12.275482177734375, 0.4843330383300781, 60.46543884277344, 12.561534881591797, -7.194423675537109, 115.54400634765625, 5.444906234741211, 31.313213348388672, 32.932273864746094, 55.52000427246094, 82.42337036132812, 30.356918334960938, 14.561820983886719, 30.488792419433594, 22.334293365478516, -3.1695327758789062, -4.769720077514648, 17.49265480041504, 37.53179931640625, 63.3040771484375, 43.70991134643555, 20.14773941040039, 36.934967041015625, 70.2790298461914, 33.96900939941406, 23.8717098236084, 5.72172737121582, 46.11748504638672, -3.7518463134765625, -18.46282958984375, 69.85726928710938, 77.32168579101562, -7.628576278686523, 61.618408203125, 116.23855590820312, -6.863624572753906, -23.186920166015625, 90.13291931152344, 33.06597137451172, 59.92033386230469, -3.225341796875, 19.629119873046875, -14.918716430664062, -5.571502685546875, 37.35472869873047, 16.768051147460938, 36.8573112487793, 58.42106628417969, 74.3199462890625, 54.72483825683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000094.npy"}
|
||||
{"epoch": 0.13803230543318648, "step": 95, "batch_size": 64, "mean": 22.786226272583008, "std": 30.680328369140625, "min": -75.20936584472656, "p10": -7.656877136230468, "median": 17.04152011871338, "p90": 66.72337646484377, "max": 95.77642822265625, "pos_frac": 0.8125, "sample": [16.6263427734375, 44.517295837402344, 5.301033020019531, 23.973899841308594, 20.31962776184082, 70.55743408203125, 9.307880401611328, 6.511499404907227, -8.06484603881836, 16.264808654785156, 19.942068099975586, 4.556800842285156, 35.495174407958984, 85.30485534667969, 52.06074523925781, -6.867668151855469, 38.834434509277344, 2.751798629760742, 15.804828643798828, 48.58637237548828, 23.416824340820312, 41.55912780761719, 20.03270149230957, 15.718050003051758, 10.124765396118164, -12.390968322753906, 10.888206481933594, 55.73796844482422, 21.32822036743164, 30.450164794921875, -10.497352600097656, -7.995109558105469, 55.70228576660156, 95.77642822265625, 79.52926635742188, 60.9071044921875, 37.32630157470703, 1.5162239074707031, -0.20384979248046875, 69.216064453125, 46.97641372680664, 84.18026733398438, 73.57237243652344, 4.9846954345703125, 14.944408416748047, 9.065452575683594, -1.4698982238769531, 32.67198944091797, 31.32799530029297, -75.20936584472656, 2.500734329223633, 2.181406021118164, -31.2464599609375, 47.260223388671875, 44.310150146484375, 13.504413604736328, 42.7747802734375, 11.700593948364258, 17.456697463989258, 17.99138641357422, -5.72093391418457, -3.1235828399658203, 15.97286605834961, -38.21495819091797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000095.npy"}
|
||||
{"epoch": 0.1395007342143906, "step": 96, "batch_size": 64, "mean": 30.88555145263672, "std": 37.0584602355957, "min": -57.33744812011719, "p10": -0.38352718353271414, "median": 19.872764587402344, "p90": 79.21403121948244, "max": 161.4724884033203, "pos_frac": 0.890625, "sample": [65.58268737792969, 17.4354248046875, 38.51045227050781, 32.180118560791016, 19.894363403320312, 66.86395263671875, 6.2799835205078125, 2.816495895385742, 0.32781219482421875, 25.446533203125, 115.16505432128906, 17.14563751220703, 9.794715881347656, -4.072948455810547, 2.9446067810058594, 21.44650650024414, 30.03711700439453, 45.391868591308594, 87.72692108154297, 93.24031066894531, 9.388618469238281, 45.42852783203125, 38.24474334716797, 161.4724884033203, 76.07219696044922, 13.293720245361328, 19.851165771484375, 9.393081665039062, 16.80120849609375, 16.17113494873047, 9.422004699707031, 138.50711059570312, 33.634315490722656, 29.522010803222656, 6.4542999267578125, 90.88711547851562, 59.1224365234375, 6.800403594970703, 38.09928894042969, 1.6746749877929688, 49.293052673339844, 27.573148727416992, 23.6749267578125, -10.116340637207031, 15.499696731567383, 0.8283672332763672, -2.5232696533203125, 42.095359802246094, -1.4127349853515625, 26.07379913330078, 12.1845703125, 61.097503662109375, -14.810054779052734, 7.7621612548828125, 69.89892578125, 11.582015991210938, 18.14607048034668, 68.06767272949219, 80.56053161621094, 1.5726165771484375, -0.6883869171142578, 32.20427703857422, -57.33744812011719, 1.0505828857421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000096.npy"}
|
||||
{"epoch": 0.14096916299559473, "step": 97, "batch_size": 64, "mean": 23.93172836303711, "std": 28.983230590820312, "min": -63.712669372558594, "p10": -8.451221656799314, "median": 23.027634620666504, "p90": 58.85814285278321, "max": 111.60845947265625, "pos_frac": 0.78125, "sample": [18.468971252441406, 51.926239013671875, 1.095245361328125, 1.4192733764648438, -4.739727020263672, -3.709003448486328, 17.471576690673828, 29.22523307800293, 2.364124298095703, 49.01054382324219, 24.31058120727539, 43.505950927734375, -0.8289318084716797, 18.963773727416992, 81.15130615234375, 63.41191101074219, -22.314979553222656, 35.60444641113281, 0.399932861328125, 13.969320297241211, 27.674041748046875, 7.001682281494141, 16.45315933227539, -22.33294677734375, 72.9034423828125, 20.45200538635254, 26.092056274414062, 31.462417602539062, 42.12318420410156, 15.038990020751953, 27.769187927246094, -5.655145645141602, 17.399642944335938, -11.86161994934082, 51.580894470214844, 43.79095458984375, 51.711509704589844, 30.741222381591797, -63.712669372558594, -9.781364440917969, 50.35163116455078, 32.65144348144531, -11.070205688476562, 8.32802963256836, 57.00322723388672, 36.361263275146484, 1.6541461944580078, 22.499248504638672, 23.556020736694336, 17.88970947265625, -4.928670883178711, 59.653106689453125, -9.649539947509766, 52.55946731567383, -2.122020721435547, 67.31004333496094, -1.431793212890625, 44.68359375, 49.32331848144531, 64.54582214355469, 33.751346588134766, 10.030143737792969, 111.60845947265625, 27.51641845703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000097.npy"}
|
||||
{"epoch": 0.14243759177679882, "step": 98, "batch_size": 64, "mean": 27.260852813720703, "std": 38.40777587890625, "min": -108.06094360351562, "p10": -13.337170410156247, "median": 29.30670166015625, "p90": 69.95939636230469, "max": 133.81820678710938, "pos_frac": 0.796875, "sample": [42.320709228515625, 5.840675354003906, 66.4588623046875, 44.99365997314453, 6.21092414855957, 21.40167808532715, 24.938629150390625, 28.056930541992188, 32.12144470214844, 7.642425537109375, 30.990755081176758, -9.695709228515625, 21.17330551147461, 24.69207000732422, 68.37516784667969, 52.37786102294922, -14.897796630859375, 1.818756103515625, -0.5528030395507812, -23.180931091308594, 41.79063034057617, 36.29889678955078, -31.058181762695312, -40.01727294921875, 56.481773376464844, 53.75618362426758, 32.287109375, 71.27775573730469, 70.63835144042969, 26.78922462463379, 41.42106628417969, -3.811767578125, 63.73918914794922, 133.81820678710938, -108.06094360351562, 84.93840026855469, 62.843231201171875, 28.230438232421875, 48.982391357421875, 19.016250610351562, -9.104839324951172, 40.75794982910156, -60.33821105957031, 26.70922088623047, 38.13836669921875, 3.7357139587402344, 40.613014221191406, 85.36416625976562, 62.31758117675781, 38.58755874633789, 16.365577697753906, 11.004493713378906, 76.86769104003906, 107.51681518554688, 36.13323211669922, 33.37163162231445, 30.382965087890625, 15.279672622680664, 3.9234466552734375, -8.99703598022461, 54.58131408691406, -3.7014617919921875, 3.807649612426758, -19.06938934326172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000098.npy"}
|
||||
{"epoch": 0.14390602055800295, "step": 99, "batch_size": 64, "mean": 33.529537200927734, "std": 42.39036560058594, "min": -33.647193908691406, "p10": -8.1749885559082, "median": 22.441463470458984, "p90": 85.10894775390625, "max": 183.78843688964844, "pos_frac": 0.828125, "sample": [-0.7361507415771484, -9.336524963378906, 6.081796646118164, 21.696735382080078, 2.1112289428710938, 42.45964813232422, 27.878799438476562, 31.303390502929688, 111.08151245117188, 20.92119026184082, 25.467124938964844, 126.1180419921875, 6.838275909423828, -30.394546508789062, 85.21052551269531, -5.039817810058594, 22.337356567382812, 37.05744171142578, -2.1434059143066406, 18.35388946533203, 76.46780395507812, 16.19298553466797, 12.009923934936523, 161.922119140625, 10.174631118774414, 60.55049133300781, 2.5272750854492188, -25.611000061035156, 11.066804885864258, -33.647193908691406, -10.64788818359375, 55.81605529785156, 39.15570068359375, 27.937305450439453, 68.06257629394531, 183.78843688964844, 76.37918090820312, 38.342899322509766, 18.2242431640625, 21.171295166015625, 5.594169616699219, 30.261844635009766, 39.56413269042969, 84.87193298339844, 103.97652435302734, 7.57794189453125, 12.7412109375, 69.67399597167969, -5.4647369384765625, 0.6630535125732422, 33.84217834472656, 91.18692016601562, 31.66128158569336, 10.534000396728516, 58.47578430175781, -9.542388916015625, 43.02797317504883, 14.909395217895508, 70.01617431640625, 52.305015563964844, 22.545570373535156, 48.47614288330078, 4.080982208251953, -22.238868713378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000099.npy"}
|
||||
{"epoch": 0.14537444933920704, "step": 100, "batch_size": 64, "mean": 19.546470642089844, "std": 36.68450164794922, "min": -77.53128051757812, "p10": -19.88119106292724, "median": 20.06147003173828, "p90": 63.9695899963379, "max": 147.4073486328125, "pos_frac": 0.765625, "sample": [56.73828125, 30.950937271118164, -6.6725006103515625, 15.752799987792969, 23.68604278564453, -9.184127807617188, 33.86321258544922, 26.436988830566406, 8.020082473754883, 43.439849853515625, 11.66412353515625, 1.1168441772460938, 38.964874267578125, 25.528518676757812, 4.158088684082031, 79.95195007324219, 20.038436889648438, -43.874603271484375, -33.675994873046875, -54.05145263671875, -14.558443069458008, -9.735023498535156, 0.5418834686279297, -64.6724853515625, 56.773929595947266, 20.084503173828125, 50.495338439941406, 24.165863037109375, 1.4858932495117188, 5.542970657348633, 44.459747314453125, -3.093109130859375, 45.781349182128906, 6.036243438720703, 68.66903686523438, 32.42768096923828, 8.875772476196289, 12.404953002929688, 61.729576110839844, 26.555145263671875, 41.20512008666992, 16.159523010253906, 8.271881103515625, 0.9214763641357422, 31.901947021484375, 35.99229431152344, -9.676773071289062, -27.049423217773438, -7.6540985107421875, 68.50006866455078, 37.083900451660156, 70.95934295654297, 24.97989845275879, -77.53128051757812, 147.4073486328125, -11.639547348022461, 16.561279296875, 82.20352172851562, -22.162368774414062, 64.92959594726562, 46.58087158203125, 7.666435241699219, 37.75226593017578, 20.78765869140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000100.npy"}
|
||||
{"epoch": 0.14684287812041116, "step": 101, "batch_size": 64, "mean": 23.726482391357422, "std": 41.204681396484375, "min": -54.50536346435547, "p10": -20.304966354370116, "median": 18.834228515625, "p90": 86.542172241211, "max": 134.91500854492188, "pos_frac": 0.671875, "sample": [98.61103820800781, 134.91500854492188, 30.93022918701172, 30.677452087402344, -14.547348022460938, 104.59864044189453, 108.92880249023438, 10.652336120605469, 94.075927734375, 111.01719665527344, -36.232421875, 27.746078491210938, -40.486427307128906, 48.834716796875, 14.127130508422852, -7.944816589355469, -21.565673828125, 15.418733596801758, 63.51422882080078, 28.400665283203125, -15.980926513671875, 26.850753784179688, 54.85304260253906, 63.965232849121094, 18.890701293945312, 18.777755737304688, 9.55440902709961, 65.0494613647461, 120.429443359375, 25.8677978515625, -8.510055541992188, -6.437633514404297, -14.11674690246582, 52.1164436340332, 18.1091365814209, 38.13713073730469, 68.96340942382812, 45.69136047363281, -5.95135498046875, 6.439830780029297, -2.735198974609375, -17.36331558227539, 20.105693817138672, -6.905570983886719, -0.3747100830078125, -15.595651626586914, 35.80188751220703, 32.460201263427734, 8.871944427490234, 26.102495193481445, -54.50536346435547, 35.64099884033203, 2.0473175048828125, -31.343463897705078, 25.935955047607422, 18.07001495361328, -15.631938934326172, 39.50271987915039, 7.665290832519531, -1.7652912139892578, 52.04615783691406, -29.158382415771484, 40.483680725097656, -35.23127746582031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000101.npy"}
|
||||
{"epoch": 0.14831130690161526, "step": 102, "batch_size": 64, "mean": 17.46571159362793, "std": 39.091835021972656, "min": -79.61849975585938, "p10": -22.37333831787109, "median": 10.995735168457031, "p90": 57.55325317382813, "max": 140.27505493164062, "pos_frac": 0.65625, "sample": [3.2738609313964844, 40.716041564941406, 58.453514099121094, 11.78662109375, 3.626983642578125, 5.870094299316406, -16.68017578125, -45.423919677734375, 22.14699363708496, -76.70208740234375, 28.827957153320312, 88.55770874023438, -11.785076141357422, 38.6180419921875, 28.861949920654297, -9.279325485229492, 10.204849243164062, 35.03912353515625, 50.96240997314453, 2.0311851501464844, -16.659896850585938, -4.55653190612793, 48.65464782714844, -4.967267990112305, 59.88226318359375, 22.698711395263672, 38.592933654785156, 140.27505493164062, -3.9162826538085938, -0.6503753662109375, 113.39310455322266, -0.37514495849609375, 37.7563591003418, 42.639007568359375, 4.012275695800781, -17.196510314941406, 32.55870056152344, -79.61849975585938, 1.638214111328125, -28.701766967773438, 0.9338226318359375, -44.50489807128906, -23.266357421875, -4.44244384765625, 55.45264434814453, 30.9725341796875, 42.624229431152344, 83.16188049316406, 43.99525451660156, 6.451629638671875, 70.58580780029297, -27.291915893554688, -6.812553405761719, 5.957637786865234, 53.14813232421875, -20.289627075195312, 39.88365936279297, 34.81810760498047, 24.917531967163086, 37.20911407470703, -13.141555786132812, -16.28406524658203, 41.69102478027344, 47.4702033996582], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000102.npy"}
|
||||
{"epoch": 0.14977973568281938, "step": 103, "batch_size": 64, "mean": 35.02385330200195, "std": 42.719722747802734, "min": -38.652313232421875, "p10": -11.033242034912108, "median": 27.810401916503906, "p90": 82.2153060913086, "max": 182.66400146484375, "pos_frac": 0.796875, "sample": [-6.147697448730469, 7.9886474609375, 14.065181732177734, 34.273826599121094, -21.068283081054688, -25.463546752929688, 34.36381149291992, 39.30308532714844, 14.213783264160156, 19.908702850341797, 28.339385986328125, 45.07232666015625, 16.861572265625, 19.392189025878906, -0.9474334716796875, 66.92534637451172, 111.82949829101562, 35.437400817871094, 49.75343322753906, 61.86951446533203, -35.87870788574219, -11.131072998046875, 48.923095703125, 26.3994140625, -38.652313232421875, 75.5114517211914, 36.43000793457031, -17.410751342773438, 14.959815979003906, 19.20660400390625, 39.24109649658203, 168.00119018554688, 26.509368896484375, 20.432453155517578, 71.70558166503906, 67.5885238647461, -6.100372314453125, -10.804969787597656, 21.656587600708008, 17.91209602355957, -5.0517425537109375, 40.53813934326172, 64.20623016357422, -28.53826904296875, 23.84051513671875, 100.65840911865234, 80.49169921875, 27.281417846679688, 182.66400146484375, 101.21188354492188, 105.32128143310547, 82.95399475097656, 12.912353515625, 41.224586486816406, 68.19145202636719, -1.5090904235839844, 32.377418518066406, 4.310441970825195, 3.953197479248047, 50.46277618408203, 79.1241683959961, 16.499736785888672, 32.412384033203125, 45.51972198486328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000103.npy"}
|
||||
{"epoch": 0.1512481644640235, "step": 104, "batch_size": 64, "mean": 33.20488739013672, "std": 48.7789192199707, "min": -65.19480895996094, "p10": -12.749076271057127, "median": 19.275466918945312, "p90": 92.76042938232422, "max": 191.7330322265625, "pos_frac": 0.75, "sample": [92.72117614746094, -7.2371978759765625, 14.162010192871094, 14.951263427734375, 62.29191589355469, -5.756782531738281, -29.009628295898438, 3.1871490478515625, -3.4878082275390625, 39.379364013671875, -13.266847610473633, 35.201358795166016, 152.31777954101562, 52.413429260253906, 86.41350555419922, -2.823333740234375, 92.77725219726562, 33.602813720703125, -17.78491973876953, 117.25363159179688, 39.27598571777344, 5.0888824462890625, 19.391422271728516, 7.319236755371094, 15.520940780639648, 2.170867919921875, 191.7330322265625, -47.68230438232422, 58.152320861816406, 136.41116333007812, 138.77210998535156, 30.235610961914062, 13.237398147583008, 66.31442260742188, 12.620485305786133, 14.730087280273438, -11.540943145751953, -22.675308227539062, 115.57476806640625, 16.44647216796875, 51.12366485595703, -7.728057861328125, 69.73475646972656, -3.927398681640625, 85.81184387207031, 38.44879150390625, 31.520103454589844, 52.162899017333984, 37.821014404296875, 30.613571166992188, 3.990762710571289, 91.16230773925781, 7.709266662597656, 28.5931396484375, 61.39970397949219, 67.65904998779297, -7.461236953735352, 28.19187355041504, -0.7981300354003906, 19.15951156616211, -23.702537536621094, 1.5627326965332031, -65.19480895996094, 8.857025146484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000104.npy"}
|
||||
{"epoch": 0.1527165932452276, "step": 105, "batch_size": 64, "mean": 47.245811462402344, "std": 42.50857925415039, "min": -69.57260131835938, "p10": 2.6169235229492207, "median": 45.290687561035156, "p90": 102.3254058837891, "max": 178.41539001464844, "pos_frac": 0.90625, "sample": [49.417755126953125, 32.08836364746094, 46.201026916503906, 29.724197387695312, 42.776092529296875, 45.43657684326172, 73.86967468261719, 111.6141128540039, 10.081838607788086, -69.57260131835938, -6.601127624511719, 83.31201171875, 83.32687377929688, 13.71388053894043, 35.422515869140625, 48.94510269165039, -32.931453704833984, 1.8159332275390625, -6.0894775390625, 109.63358306884766, 32.863250732421875, 109.13957214355469, 71.09291076660156, 20.33587646484375, 11.845321655273438, 82.1181411743164, 63.12397766113281, 45.144798278808594, 9.533708572387695, 37.91443634033203, 68.181396484375, 168.99819946289062, 20.29419708251953, 51.57464599609375, 17.48809051513672, 19.328880310058594, 71.32295227050781, 92.38270568847656, 48.253936767578125, 17.30838394165039, 51.20341491699219, 92.8331298828125, 35.869964599609375, 37.992828369140625, 44.68278503417969, 53.06544494628906, -0.9669647216796875, 66.00285339355469, 68.88980102539062, 106.39352416992188, 37.047210693359375, 22.834434509277344, -19.694480895996094, 9.487434387207031, 66.06385040283203, 60.079627990722656, 56.63511657714844, 114.0509033203125, 61.36347961425781, 178.41539001464844, 25.939285278320312, 4.48590087890625, 13.008779525756836, 77.61781311035156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000105.npy"}
|
||||
{"epoch": 0.15418502202643172, "step": 106, "batch_size": 64, "mean": 29.639097213745117, "std": 36.756900787353516, "min": -54.69886016845703, "p10": -13.173191070556634, "median": 29.612028121948242, "p90": 78.07882080078126, "max": 136.35772705078125, "pos_frac": 0.84375, "sample": [27.903884887695312, 5.232063293457031, 8.707311630249023, -4.180507659912109, 18.09636688232422, 2.150789260864258, 50.27300262451172, -6.449485778808594, 31.504005432128906, 68.74638366699219, 36.879722595214844, 37.100093841552734, 2.6589736938476562, 75.73753356933594, 27.378074645996094, 94.89424133300781, 7.313371658325195, 12.267568588256836, 79.08222961425781, 1.2037200927734375, 64.79762268066406, 42.234249114990234, 0.9738998413085938, 86.65374755859375, 23.92411994934082, 44.829872131347656, 7.688194274902344, -16.054779052734375, 55.21961212158203, -37.806121826171875, 48.45410919189453, 15.997779846191406, 41.87967300415039, 40.423866271972656, -2.640787124633789, 5.474822998046875, 24.293861389160156, 1.7227706909179688, 92.80143737792969, 64.19329071044922, 64.2239990234375, 25.408344268798828, -54.69886016845703, 3.9319610595703125, 136.35772705078125, 38.62109375, 28.208316802978516, 34.202552795410156, 32.523223876953125, 9.250991821289062, 0.6617813110351562, 59.15704345703125, 106.91826629638672, 35.095062255859375, 55.541595458984375, 58.09477233886719, -23.58470916748047, 33.75492858886719, -23.286544799804688, 31.01573944091797, 82.98283386230469, -42.02271270751953, -24.16693878173828, 47.151268005371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000106.npy"}
|
||||
{"epoch": 0.15565345080763582, "step": 107, "batch_size": 64, "mean": 33.398658752441406, "std": 43.218994140625, "min": -91.23826599121094, "p10": -12.554787826538076, "median": 22.860742568969727, "p90": 86.00971984863281, "max": 142.18540954589844, "pos_frac": 0.84375, "sample": [85.90151977539062, 112.58456420898438, 47.992034912109375, 69.022705078125, 24.93872833251953, 31.901214599609375, 52.47154235839844, -3.284832000732422, -91.23826599121094, 9.427431106567383, 16.40401840209961, 77.58819580078125, -38.39448547363281, 28.39364242553711, 10.09295654296875, 7.750175476074219, 15.428308486938477, 86.05609130859375, 1.9009552001953125, -27.561420440673828, 108.97586822509766, 67.74944305419922, 80.32427978515625, 6.186182022094727, -16.6263427734375, 3.2761611938476562, -0.907623291015625, 4.664154052734375, 142.18540954589844, 75.58936309814453, 18.41766357421875, 18.339698791503906, 99.00785064697266, 45.13252258300781, 65.757080078125, 89.00601959228516, 16.834117889404297, 29.737594604492188, 16.83355140686035, -16.527626037597656, 62.90782165527344, -47.22997283935547, 66.37883758544922, 76.13499450683594, 11.07135009765625, 83.03240966796875, 0.084075927734375, 81.11123657226562, 49.29231262207031, 53.95372009277344, 24.18799591064453, 22.107263565063477, 4.2209014892578125, -28.48076629638672, -0.47658538818359375, 5.815216064453125, 13.275920867919922, 73.53740692138672, 11.0986328125, 23.614221572875977, 68.13519287109375, 99.4366455078125, 2.4420623779296875, 10.532913208007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000107.npy"}
|
||||
{"epoch": 0.15712187958883994, "step": 108, "batch_size": 64, "mean": 40.09585952758789, "std": 58.3842887878418, "min": -62.987884521484375, "p10": -23.431006813049315, "median": 28.162052154541016, "p90": 124.41716613769535, "max": 215.70904541015625, "pos_frac": 0.765625, "sample": [36.898895263671875, 136.04039001464844, 40.02467346191406, -45.573699951171875, -62.987884521484375, 109.36380004882812, 31.11400032043457, 8.993698120117188, 215.70904541015625, -0.5573883056640625, -10.747039794921875, 128.13247680664062, 8.562385559082031, 6.37091064453125, -5.377531051635742, 72.88861846923828, -7.158758163452148, 97.02919006347656, 164.6047821044922, 107.8348617553711, 115.74810791015625, 83.43663024902344, 1.8639698028564453, -23.35981559753418, 48.44830322265625, 10.324003219604492, 27.531112670898438, 16.767879486083984, 76.37478637695312, 5.243770599365234, 25.811111450195312, 0.7705039978027344, 71.47579193115234, -23.461517333984375, 175.04165649414062, -40.10527038574219, 81.89620971679688, -29.691097259521484, -15.564811706542969, 4.579496383666992, 26.49681854248047, 7.335693359375, 50.641448974609375, 156.2396240234375, 33.969383239746094, 10.835916519165039, 90.30509185791016, 74.69284057617188, 31.034936904907227, -53.732513427734375, 14.696914672851562, 51.665992736816406, 128.13833618164062, 64.08744812011719, -1.5820808410644531, -1.7738590240478516, 38.786895751953125, 16.418376922607422, 73.85591125488281, -49.560665130615234, 28.792991638183594, 40.921504974365234, 66.28192138671875, 23.289710998535156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000108.npy"}
|
||||
{"epoch": 0.15859030837004406, "step": 109, "batch_size": 64, "mean": 42.91639709472656, "std": 50.9198112487793, "min": -83.68093872070312, "p10": -11.263848876953123, "median": 36.97645950317383, "p90": 108.12711639404297, "max": 187.08523559570312, "pos_frac": 0.828125, "sample": [50.252960205078125, 8.75705337524414, 136.8688201904297, 86.84163665771484, 30.861976623535156, 54.967777252197266, -59.04893493652344, 12.761398315429688, 18.040359497070312, 49.19566345214844, 132.66436767578125, 42.68723678588867, 73.77677917480469, 54.83634948730469, 54.1439208984375, 5.846540451049805, 22.29671859741211, 37.73014831542969, -12.966793060302734, -5.094636917114258, -5.4224090576171875, 187.08523559570312, 0.4308052062988281, -9.953292846679688, 26.266477584838867, 4.3270263671875, 127.49838256835938, 30.758272171020508, 33.253692626953125, -39.08600997924805, 62.90638732910156, 70.23760986328125, 9.356151580810547, 70.93142700195312, 56.66131591796875, 39.02293395996094, 100.62156677246094, 72.00210571289062, 109.04541778564453, 105.50257110595703, -0.38330841064453125, 13.019844055175781, -83.68093872070312, 29.025508880615234, 1.27862548828125, 73.99420928955078, 35.590850830078125, 34.92375564575195, 19.27830696105957, 82.8170166015625, -49.715576171875, 34.34466552734375, 37.5084114074707, 112.73335266113281, 127.86878967285156, 37.098045349121094, 86.75780487060547, 36.85487365722656, 91.17231750488281, 105.98441314697266, -11.825515747070312, -23.118606567382812, 3.110414505004883, 105.1469955444336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000109.npy"}
|
||||
{"epoch": 0.16005873715124816, "step": 110, "batch_size": 64, "mean": 38.542945861816406, "std": 54.42041778564453, "min": -101.18891906738281, "p10": -15.089390182495116, "median": 33.35336971282959, "p90": 112.93467559814455, "max": 182.9139404296875, "pos_frac": 0.8125, "sample": [123.07748413085938, 55.786834716796875, 23.039813995361328, 54.17628479003906, 59.18804168701172, 36.903297424316406, 49.5981330871582, -1.1092529296875, -101.18891906738281, 45.31352996826172, 66.94741821289062, 16.174514770507812, -64.92230224609375, 19.09193992614746, 12.172557830810547, 1.4801654815673828, 30.91798973083496, 169.1707763671875, 107.51722717285156, 27.692344665527344, 4.956296920776367, -8.460025787353516, 182.9139404296875, 136.1932373046875, 101.81736755371094, 78.96009826660156, 17.51611328125, -18.615737915039062, -14.361141204833984, 61.39453125, -43.722381591796875, -15.401496887207031, 9.555641174316406, 19.436969757080078, 6.668481826782227, 159.321044921875, 52.9775390625, 85.43914794921875, 115.25643920898438, 4.854375839233398, 77.50228881835938, -8.206826210021973, 39.43132019042969, 35.78874969482422, 15.02853012084961, 11.052520751953125, 82.4472885131836, -58.80242919921875, 76.34036254882812, -26.2750244140625, 40.31207275390625, 1.4745540618896484, 55.89708709716797, 35.85636520385742, 51.55923080444336, 44.714576721191406, 19.705596923828125, 20.779586791992188, 18.87799644470215, 44.680335998535156, 80.11251831054688, 2.3550987243652344, -5.660900115966797, 144.04931640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000110.npy"}
|
||||
{"epoch": 0.16152716593245228, "step": 111, "batch_size": 64, "mean": 43.80938720703125, "std": 60.30035400390625, "min": -85.91824340820312, "p10": -33.51629180908203, "median": 34.01167297363281, "p90": 121.39797515869142, "max": 194.4635009765625, "pos_frac": 0.765625, "sample": [194.4635009765625, -52.857666015625, -85.64044189453125, 123.38192749023438, -7.270023345947266, 54.99259948730469, 14.695182800292969, 35.074256896972656, 28.599605560302734, 147.38905334472656, 31.687057495117188, 79.03985595703125, 42.39684295654297, 56.982269287109375, 28.091766357421875, 112.08370971679688, 113.68082427978516, -36.211212158203125, 100.28974914550781, 8.234342575073242, 79.56238555908203, -27.429306030273438, -22.793987274169922, -10.008148193359375, 66.92192077636719, 116.76875305175781, -7.4169158935546875, 113.16658020019531, -36.125, 11.748825073242188, -23.085968017578125, 32.94908905029297, 137.28067016601562, 23.134567260742188, 151.30706787109375, 55.4927978515625, 32.20097351074219, 55.57332992553711, 68.17211151123047, 132.19241333007812, 24.627410888671875, -44.63017272949219, 19.54791259765625, 29.880931854248047, 138.088134765625, 70.32637023925781, 29.897361755371094, 62.117431640625, 116.41166687011719, 2.90740966796875, 113.48086547851562, 38.06035614013672, 95.97882843017578, 72.55374145507812, 51.93638610839844, 13.630882263183594, -85.91824340820312, 32.230308532714844, 28.783235549926758, 41.96919250488281, -5.694438934326172, 93.80319213867188, -64.19985961914062, -10.703384399414062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000111.npy"}
|
||||
{"epoch": 0.16299559471365638, "step": 112, "batch_size": 64, "mean": 27.892642974853516, "std": 41.354331970214844, "min": -59.28949737548828, "p10": -24.98142318725586, "median": 26.104644775390625, "p90": 92.57630157470705, "max": 121.44387817382812, "pos_frac": 0.734375, "sample": [45.4084358215332, 121.44387817382812, 87.82441711425781, -6.266130447387695, -44.050411224365234, 34.2715950012207, 37.31488037109375, 31.51214599609375, 28.735305786132812, -5.275043487548828, 101.95770263671875, 79.27175903320312, -0.7927379608154297, -2.40325927734375, 94.61282348632812, 61.53165817260742, 39.75729751586914, 56.340614318847656, 39.013607025146484, 37.346832275390625, 43.68511962890625, 30.701065063476562, 72.04522705078125, -15.856460571289062, 25.4208984375, 23.26413917541504, 32.47991180419922, 33.072086334228516, -24.735687255859375, 17.503604888916016, 3.412607192993164, -34.851749420166016, 30.915939331054688, 110.1450424194336, 114.00847625732422, -32.673553466796875, 41.00769805908203, 14.798107147216797, 71.18470764160156, -0.03988838195800781, -59.28949737548828, 97.11986541748047, 25.289840698242188, 12.867111206054688, -13.024856567382812, 22.147777557373047, 62.609710693359375, 0.2622261047363281, 16.45862579345703, 101.27033996582031, 26.78839111328125, 8.823291778564453, 45.58216094970703, -42.606597900390625, 12.626001358032227, -25.08673858642578, -27.528457641601562, -11.28094482421875, 76.0531234741211, 1.1150016784667969, 24.94618034362793, 5.335460662841797, -8.478500366210938, 40.08686065673828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000112.npy"}
|
||||
{"epoch": 0.1644640234948605, "step": 113, "batch_size": 64, "mean": 30.034034729003906, "std": 39.00908660888672, "min": -41.707366943359375, "p10": -16.230777740478512, "median": 31.603471755981445, "p90": 83.51098861694337, "max": 120.21651458740234, "pos_frac": 0.734375, "sample": [-33.230003356933594, 76.31378173828125, -17.62386131286621, 89.03143310546875, 9.434814453125, 38.918067932128906, 14.222757339477539, 36.973297119140625, 3.363546371459961, -10.260009765625, 10.441902160644531, 86.71684265136719, 55.13788604736328, 112.12588500976562, 38.64588928222656, -19.565359115600586, -7.254753112792969, -8.957450866699219, 7.7239532470703125, 65.89720153808594, -21.026535034179688, 48.87291717529297, 82.45619201660156, -21.000396728515625, 55.31645965576172, -6.94512939453125, 2.755369186401367, 120.21651458740234, 48.96247100830078, -41.707366943359375, 19.503799438476562, 75.4683837890625, 36.27580642700195, -10.307586669921875, 38.28834533691406, 53.31769561767578, 12.410285949707031, 83.76190948486328, 20.204200744628906, -12.769439697265625, 52.755828857421875, 71.0283203125, -12.980249404907227, 2.502511978149414, -24.709426879882812, 45.88615036010742, 36.81488800048828, 41.08943176269531, -9.74945068359375, 15.376745223999023, 115.518310546875, -2.6969871520996094, 34.08042907714844, 32.932586669921875, 100.79485321044922, 82.92550659179688, 34.49665069580078, 63.55286407470703, -6.500543594360352, 10.537155151367188, 12.370147705078125, 16.45499038696289, 30.274356842041016, 47.31343078613281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000113.npy"}
|
||||
{"epoch": 0.16593245227606462, "step": 114, "batch_size": 64, "mean": 47.85365295410156, "std": 65.90361785888672, "min": -42.71179962158203, "p10": -27.82858657836914, "median": 42.72231101989746, "p90": 115.18463134765629, "max": 355.17401123046875, "pos_frac": 0.765625, "sample": [183.2822265625, 90.87767791748047, 53.962059020996094, 50.813316345214844, 28.5003662109375, 119.48541259765625, 19.74507713317871, 48.70451354980469, -37.874359130859375, 68.07124328613281, -33.10297393798828, 77.4588623046875, 44.53905487060547, 22.53491973876953, -38.769386291503906, 87.18252563476562, 14.734928131103516, -34.888572692871094, 34.702117919921875, 124.00785827636719, 12.816949844360352, 56.22858428955078, -5.171661376953125, 119.99337005615234, 41.653804779052734, 60.17579650878906, 35.34033966064453, 25.082324981689453, 38.0047607421875, 60.4914436340332, 64.70337677001953, -4.931953430175781, -9.23828125, 56.31929016113281, -26.52850341796875, 16.029333114624023, 51.29545593261719, 69.58523559570312, 41.56060791015625, 105.14947509765625, 60.46816635131836, 87.93800354003906, 6.023403167724609, 50.872032165527344, 40.89762496948242, -14.555229187011719, 236.89491271972656, 6.239631652832031, 43.79081726074219, 145.12222290039062, -10.667455673217773, 74.15347290039062, -42.71179962158203, -28.385765075683594, 355.17401123046875, 56.97761154174805, -3.847871780395508, 88.43257141113281, 36.62495422363281, -32.79924392700195, 26.897323608398438, 88.71153259277344, -18.982593536376953, 76.8388671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000114.npy"}
|
||||
{"epoch": 0.16740088105726872, "step": 115, "batch_size": 64, "mean": 32.80569076538086, "std": 66.77465057373047, "min": -154.61972045898438, "p10": -29.25564651489258, "median": 18.657734870910645, "p90": 128.55659713745123, "max": 240.00863647460938, "pos_frac": 0.734375, "sample": [-0.7601432800292969, -2.6755237579345703, 135.2223663330078, 59.688568115234375, -53.96971130371094, -1.2582473754882812, -46.7086181640625, 10.139310836791992, 12.890682220458984, 17.318443298339844, 25.313629150390625, -6.7218170166015625, 44.89437484741211, 57.030181884765625, 9.603960037231445, 169.90187072753906, 22.488893508911133, -8.04510498046875, 240.00863647460938, -11.998830795288086, 77.45281219482422, 6.5669708251953125, 141.56326293945312, 73.64765167236328, 173.1439208984375, -29.661712646484375, 21.887527465820312, 25.99264907836914, 8.09518814086914, 97.37138366699219, 176.3772430419922, 101.15408325195312, 1.197113037109375, -123.91739654541016, 8.89401626586914, 35.54155731201172, 59.76957702636719, 15.776535034179688, 9.662071228027344, 44.69298553466797, 27.552154541015625, 51.38441467285156, 26.754173278808594, 30.925308227539062, 145.06643676757812, -29.43603515625, 22.31170654296875, 17.276840209960938, -7.69342041015625, 12.164299011230469, 19.997026443481445, 80.37666320800781, 4.012504577636719, 12.483806610107422, -21.282939910888672, 113.00313568115234, 67.10203552246094, -16.764829635620117, 65.27091979980469, -154.61972045898438, 109.6143798828125, -49.141990661621094, 4.471822738647461, -28.834739685058594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000115.npy"}
|
||||
{"epoch": 0.16886930983847284, "step": 116, "batch_size": 64, "mean": 36.67411422729492, "std": 47.031639099121094, "min": -36.47880554199219, "p10": -17.71062602996826, "median": 31.761539459228516, "p90": 91.36781463623048, "max": 218.24913024902344, "pos_frac": 0.78125, "sample": [7.2645721435546875, 4.288516998291016, -32.812015533447266, -29.943153381347656, 25.090394973754883, 45.74934387207031, 65.17117309570312, 23.140907287597656, 49.28599548339844, 59.40991973876953, 31.22748565673828, -10.285207748413086, 44.06676483154297, 27.040687561035156, 9.301563262939453, -6.692771911621094, 55.916343688964844, -36.47880554199219, 86.44837951660156, 89.60987854003906, 39.684234619140625, -18.91876220703125, 23.635757446289062, 17.80217742919922, 50.4635009765625, 25.884185791015625, 77.85992431640625, 107.52108764648438, 32.771636962890625, -30.91851806640625, 124.96075439453125, 35.55176544189453, 23.398914337158203, 44.88047790527344, -14.891641616821289, -28.15515899658203, 7.041584014892578, 7.928581237792969, -4.424478530883789, 102.80963134765625, 4.717960357666016, 135.42503356933594, 32.29559326171875, 88.35145568847656, 82.26826477050781, 35.419315338134766, 56.41392517089844, -32.825439453125, -8.770814895629883, 83.53401947021484, 23.58156967163086, -6.4043426513671875, 218.24913024902344, 68.06648254394531, 12.97943115234375, 51.802734375, 68.55400085449219, 64.60450744628906, -11.000020980834961, 15.33099365234375, 0.9154205322265625, 40.753570556640625, 99.07383728027344, 92.1212158203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000116.npy"}
|
||||
{"epoch": 0.17033773861967694, "step": 117, "batch_size": 64, "mean": 29.90760612487793, "std": 51.708351135253906, "min": -101.38301086425781, "p10": -25.567924880981437, "median": 24.133713722229004, "p90": 88.79919891357424, "max": 188.38104248046875, "pos_frac": 0.765625, "sample": [13.443355560302734, 53.05120086669922, 50.83949279785156, -17.324016571044922, 22.637537002563477, -2.9664535522460938, 19.619768142700195, 39.465492248535156, 21.907196044921875, 40.94786071777344, 44.525882720947266, 68.64865112304688, 134.91290283203125, 114.41522216796875, 18.83342742919922, 46.54035186767578, -60.30613708496094, 91.77955627441406, 4.423828125, -95.92092895507812, -29.101028442382812, 8.845687866210938, 16.394153594970703, 65.45166015625, 45.00901412963867, -12.859710693359375, -35.558616638183594, 3.886484146118164, 25.62989044189453, -15.096710205078125, 39.87992858886719, 122.04266357421875, -3.5242538452148438, 2.456939697265625, 152.14154052734375, 13.908332824707031, 188.38104248046875, 16.409820556640625, 30.76520538330078, 67.03890991210938, 14.679927825927734, 9.317577362060547, 8.282245635986328, -9.279541015625, -101.38301086425781, 30.47394561767578, 81.84503173828125, 33.830718994140625, 92.10930633544922, 40.03273010253906, -46.769622802734375, -9.755910873413086, 7.818271636962891, 54.48391342163086, 81.31777954101562, 80.66860961914062, 6.396942138671875, 49.16773986816406, 73.6393051147461, 29.196212768554688, 44.254127502441406, -2.6672210693359375, -36.747222900390625, 71.59969329833984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000117.npy"}
|
||||
{"epoch": 0.17180616740088106, "step": 118, "batch_size": 64, "mean": 36.418636322021484, "std": 47.34600830078125, "min": -55.367218017578125, "p10": -13.181039047241207, "median": 24.210975646972656, "p90": 111.10452117919925, "max": 159.87820434570312, "pos_frac": 0.765625, "sample": [-7.303394317626953, 22.016704559326172, 0.3593921661376953, -26.72845458984375, 19.83367919921875, 16.779613494873047, 61.37601089477539, 44.12724304199219, 48.527488708496094, 114.83573913574219, 87.79495239257812, 23.1553955078125, 127.13302612304688, 30.32538604736328, -7.989307403564453, 55.13813018798828, -16.370716094970703, -15.121337890625, 32.37045669555664, 159.87820434570312, 121.05738830566406, -8.653675079345703, -55.367218017578125, 2.8754425048828125, -39.90943908691406, 21.01310157775879, 2.402872085571289, 33.517967224121094, 14.235980987548828, 14.213180541992188, 10.679412841796875, 76.33991241455078, 3.5087127685546875, 76.83246612548828, 25.233612060546875, 49.810150146484375, 22.266889572143555, 42.60003662109375, 151.7835693359375, -2.562450408935547, 31.0313720703125, -18.32373809814453, -2.6290130615234375, 41.343231201171875, 146.21041870117188, 87.21880340576172, 117.42613220214844, 43.61451721191406, 80.42098236083984, 13.297395706176758, -0.35356903076171875, 69.24150848388672, 102.39834594726562, 71.35924530029297, 51.8612060546875, 23.675155639648438, 36.14942169189453, 17.79030990600586, -0.8593177795410156, 24.746795654296875, -21.458084106445312, 85.01007843017578, 4.614898681640625, -5.009574890136719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000118.npy"}
|
||||
{"epoch": 0.17327459618208516, "step": 119, "batch_size": 64, "mean": 35.89599609375, "std": 57.14711380004883, "min": -69.7904052734375, "p10": -30.321307373046874, "median": 37.3317985534668, "p90": 97.57997741699221, "max": 211.30941772460938, "pos_frac": 0.6875, "sample": [13.909841537475586, 59.56272888183594, 38.44801330566406, 149.19363403320312, 39.3428955078125, -34.15357971191406, -7.9835052490234375, 51.131011962890625, -5.260486602783203, 51.67803192138672, -10.113731384277344, -12.446662902832031, 211.30941772460938, 48.96196746826172, 19.435836791992188, 122.60592651367188, 22.407390594482422, 12.076522827148438, 184.1217803955078, 49.10856628417969, 93.31806945800781, 75.84992980957031, 77.43982696533203, -7.738079071044922, 31.297531127929688, 37.62848663330078, -34.847618103027344, 17.709487915039062, 55.9488525390625, 57.40216064453125, 0.972381591796875, -3.1845016479492188, 99.40650939941406, 37.61137771606445, 174.73809814453125, -3.8936004638671875, 65.87670135498047, 81.79545593261719, -2.3208789825439453, 27.938858032226562, -56.26591491699219, 44.57013702392578, -59.599945068359375, 130.28497314453125, 50.6749267578125, 67.27041625976562, -25.720748901367188, -31.105209350585938, 86.9720687866211, 37.05221939086914, -28.492202758789062, -69.7904052734375, -0.623260498046875, 88.4783935546875, 15.477142333984375, 58.2187614440918, 51.085060119628906, 10.402101516723633, 66.58283996582031, -7.331760406494141, -42.66530990600586, 3.703765869140625, 42.044189453125, -20.16339874267578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000119.npy"}
|
||||
{"epoch": 0.17474302496328928, "step": 120, "batch_size": 64, "mean": 49.90165328979492, "std": 60.92795181274414, "min": -85.1566162109375, "p10": -19.805996322631827, "median": 40.57330322265625, "p90": 137.2021438598633, "max": 195.42529296875, "pos_frac": 0.84375, "sample": [154.5394744873047, 89.409912109375, 32.052398681640625, 111.28125, 36.581153869628906, 9.48872184753418, -61.51133728027344, 102.87380981445312, 20.922590255737305, 72.81734466552734, 53.99250030517578, 166.542724609375, 82.23482513427734, 7.2748260498046875, 18.160192489624023, 24.0743408203125, 139.36260986328125, 60.79853820800781, 69.15618133544922, -39.017372131347656, -40.844879150390625, 132.1610565185547, 42.26402282714844, 31.613388061523438, 44.04875946044922, 38.10643005371094, 49.45269775390625, 7.845489501953125, 168.13076782226562, -85.1566162109375, -23.61602783203125, 41.97831726074219, 27.533164978027344, 3.8558197021484375, 107.1156005859375, 10.243711471557617, 195.42529296875, 54.93964767456055, 72.64335632324219, 4.051198959350586, 11.50583267211914, -8.053237915039062, 55.75421905517578, 188.746337890625, 34.37815856933594, 42.738189697265625, 131.90301513671875, 176.39865112304688, 39.16828918457031, 47.270599365234375, 0.3577117919921875, 90.86168670654297, -42.694549560546875, 18.51519012451172, 64.29674530029297, 36.74609375, -2.4556617736816406, 115.95074462890625, 78.39971923828125, -13.343517303466797, 77.76309204101562, -22.57563018798828, 36.7359619140625, 2.5121917724609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000120.npy"}
|
||||
{"epoch": 0.1762114537444934, "step": 121, "batch_size": 64, "mean": 54.51830291748047, "std": 70.97077178955078, "min": -150.0506591796875, "p10": -9.695475769042966, "median": 49.638938903808594, "p90": 151.12088470458986, "max": 219.7302703857422, "pos_frac": 0.765625, "sample": [8.873899459838867, -7.183631896972656, -2.4401702880859375, 219.7302703857422, -6.114606857299805, 10.488059997558594, 97.05757141113281, -16.62500762939453, 79.22713470458984, 76.24776458740234, -2.848907470703125, 64.12995147705078, 139.9593048095703, 109.38724517822266, 82.5220947265625, -1.6766738891601562, 89.54806518554688, 67.12173461914062, 54.06590270996094, -41.49725341796875, 38.006500244140625, 113.88722229003906, -59.308677673339844, 150.7321014404297, 171.9572296142578, 65.78280639648438, 35.657859802246094, 16.324237823486328, 112.19673156738281, 3.67669677734375, 77.53450012207031, -21.40644073486328, 49.293365478515625, 86.47810363769531, 2.1629161834716797, 201.76986694335938, 4.048614501953125, 182.30166625976562, 44.8675537109375, 208.90499877929688, 8.925804138183594, 128.76785278320312, -10.771980285644531, 16.74261474609375, -24.915374755859375, 11.102943420410156, 77.03428649902344, 24.45508575439453, -6.4144439697265625, 54.381622314453125, 5.976921081542969, 19.078765869140625, 62.17393493652344, 132.76522827148438, 151.28750610351562, 72.97723388671875, -150.0506591796875, 56.5897216796875, -5.4134368896484375, 79.50196838378906, 213.29147338867188, 19.53594398498535, -2.6767616271972656, 49.98451232910156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000121.npy"}
|
||||
{"epoch": 0.1776798825256975, "step": 122, "batch_size": 64, "mean": 68.18690490722656, "std": 67.45944213867188, "min": -91.22843933105469, "p10": 0.6514129638671886, "median": 52.04137420654297, "p90": 163.76800384521485, "max": 235.412353515625, "pos_frac": 0.90625, "sample": [154.61512756347656, 21.482276916503906, 77.2289810180664, 83.73464965820312, 19.816804885864258, 26.367919921875, 51.01652526855469, 101.48186492919922, 22.762527465820312, 117.16461181640625, 15.06005859375, 131.55792236328125, 126.41223907470703, 235.412353515625, 106.33892059326172, 35.70105743408203, 16.518226623535156, 55.264381408691406, 163.5077362060547, 50.039276123046875, 20.66376495361328, 142.4174346923828, 35.77676010131836, -8.278564453125, 85.50431823730469, 11.024368286132812, 64.55586242675781, 140.27352905273438, 73.78472137451172, 67.33543395996094, 77.01203155517578, -26.939666748046875, 172.0748291015625, -7.48039436340332, 53.06622314453125, 164.24853515625, 63.12432098388672, 23.92078399658203, 183.26150512695312, 159.97291564941406, 8.893026351928711, 49.750701904296875, -33.62603759765625, 234.92393493652344, 17.992286682128906, 39.47880172729492, 32.87866973876953, 43.02925491333008, 23.602500915527344, -91.22843933105469, -7.7478179931640625, 29.283309936523438, 33.97517395019531, 66.5549087524414, 207.90570068359375, 1.6806221008300781, 117.905517578125, 54.62462615966797, 0.21032333374023438, 13.960426330566406, 163.87954711914062, 76.01884460449219, 19.607139587402344, 153.60696411132812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000122.npy"}
|
||||
{"epoch": 0.17914831130690162, "step": 123, "batch_size": 64, "mean": 44.62263488769531, "std": 74.36831665039062, "min": -150.82763671875, "p10": -27.755376625061032, "median": 22.759469985961914, "p90": 145.96360626220712, "max": 238.867919921875, "pos_frac": 0.65625, "sample": [81.90826416015625, 82.44892883300781, -6.955223083496094, 8.967288970947266, 89.35844421386719, 79.47175598144531, 125.93978881835938, 115.33859252929688, 80.90746307373047, 49.069496154785156, -0.3360424041748047, -19.870803833007812, 8.449634552001953, -40.72650909423828, 154.5452423095703, 88.52861022949219, 16.359569549560547, -12.711227416992188, -3.353057861328125, 67.89859008789062, -7.0127716064453125, 125.72650146484375, 67.40907287597656, -76.33677673339844, 22.640914916992188, 96.58291625976562, 215.51263427734375, 22.87802505493164, 154.69622802734375, 164.91586303710938, -25.04292869567871, 64.06024169921875, 20.065174102783203, 8.816978454589844, 85.28563690185547, 123.0103759765625, 20.37811279296875, 49.155479431152344, 178.78440856933594, -20.47734832763672, 92.20367431640625, 85.03816223144531, -44.170352935791016, -69.98319244384766, 78.01274108886719, -8.652191162109375, -41.5565299987793, -5.139482498168945, -16.54627227783203, 238.867919921875, -28.91785430908203, 66.70249938964844, 52.307899475097656, 208.7911376953125, -10.677520751953125, 21.44505500793457, 122.70240783691406, 34.75746154785156, -150.82763671875, -18.041290283203125, -0.378204345703125, -9.337787628173828, 0.4953422546386719, 2.465250015258789], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000123.npy"}
|
||||
{"epoch": 0.18061674008810572, "step": 124, "batch_size": 64, "mean": 65.97357177734375, "std": 96.20331573486328, "min": -121.34342956542969, "p10": -26.351386260986327, "median": 43.79602813720703, "p90": 179.0110565185547, "max": 379.7846984863281, "pos_frac": 0.78125, "sample": [152.47731018066406, -24.907363891601562, 41.98591613769531, 34.45823669433594, 28.111408233642578, 151.16871643066406, -99.10636901855469, -9.177104949951172, 165.7275390625, 190.12911987304688, -4.109230041503906, 29.16973876953125, 4.732141494750977, 70.98075103759766, 79.04280853271484, 233.62518310546875, 159.77590942382812, 52.35368728637695, -19.512664794921875, 52.63665008544922, -121.34342956542969, -41.286109924316406, 142.28897094726562, -1.1579399108886719, 206.349609375, 130.12692260742188, 78.9552001953125, -3.8353347778320312, 48.41490173339844, 121.35792541503906, -26.970252990722656, 33.27128982543945, 89.50144958496094, 13.732988357543945, 10.346466064453125, 349.92315673828125, 103.85391235351562, 180.5086669921875, 30.482948303222656, 175.51663208007812, 106.8206558227539, -53.30622100830078, 18.857166290283203, -18.763519287109375, 39.88665771484375, 126.13034057617188, -100.98159790039062, 103.9158935546875, 20.828855514526367, 45.60614013671875, 39.49203872680664, 81.21533966064453, -67.15815734863281, 258.1259460449219, 22.904251098632812, 379.7846984863281, 49.847076416015625, 169.12020874023438, 1.1062946319580078, 33.002952575683594, 50.9111328125, 80.00568389892578, 14.685325622558594, 10.6707763671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000124.npy"}
|
||||
{"epoch": 0.18208516886930984, "step": 125, "batch_size": 64, "mean": 58.60149383544922, "std": 79.16519927978516, "min": -91.34652709960938, "p10": -28.99659042358398, "median": 33.299259185791016, "p90": 167.7408294677735, "max": 254.54681396484375, "pos_frac": 0.78125, "sample": [-3.767576217651367, 149.3168487548828, 91.87120056152344, 23.039291381835938, -14.42984390258789, 9.361820220947266, 115.76310729980469, -27.839271545410156, 142.28976440429688, 17.0423641204834, 11.09981918334961, -29.492584228515625, 23.570863723754883, 0.6484298706054688, 102.86168670654297, 112.76519775390625, 89.64076232910156, 54.94977569580078, 18.59876823425293, 4.148273468017578, 108.75489807128906, 19.232711791992188, -55.41642761230469, -10.9505615234375, 92.33829498291016, 254.54681396484375, -35.91820526123047, 31.41789436340332, 108.89198303222656, 36.55158233642578, -5.626533508300781, -82.5436019897461, 35.08833312988281, 15.78713607788086, 31.51018524169922, 212.2133331298828, 7.430339813232422, 212.91513061523438, 122.50491333007812, -23.667869567871094, 11.840763092041016, 104.347900390625, 191.49493408203125, 93.44037628173828, 237.6697998046875, 92.87699890136719, 57.279685974121094, 45.790550231933594, 172.34487915039062, 51.37572479248047, 14.33251953125, 131.90994262695312, 42.04253387451172, -18.205106735229492, 113.10263061523438, 28.163349151611328, -42.400726318359375, 154.70509338378906, 31.054533004760742, -91.34652709960938, 211.16383361816406, 156.998046875, -34.53448486328125, 28.549341201782227], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000125.npy"}
|
||||
{"epoch": 0.18355359765051396, "step": 126, "batch_size": 64, "mean": 54.387474060058594, "std": 72.72525024414062, "min": -77.66942596435547, "p10": -22.23567352294922, "median": 38.14095115661621, "p90": 158.50478973388672, "max": 253.807373046875, "pos_frac": 0.828125, "sample": [120.682373046875, 64.40339660644531, 11.623357772827148, 90.70087432861328, 13.423210144042969, 53.056114196777344, -38.90839385986328, 2.3035411834716797, 62.32862854003906, 154.60171508789062, -52.9302978515625, 0.6986808776855469, 253.807373046875, 19.233726501464844, 86.08181762695312, 1.5380401611328125, 133.60186767578125, 43.88842010498047, 130.38182067871094, 67.97773742675781, 45.205528259277344, 52.24240493774414, -7.971767425537109, 38.30398178100586, 66.5689926147461, 187.49484252929688, -22.34319305419922, 160.1775360107422, 32.782108306884766, 47.76130676269531, 19.942684173583984, 25.51595687866211, 74.02896118164062, 14.660987854003906, 183.46295166015625, 37.97792053222656, 57.29698181152344, -75.04756164550781, 149.63397216796875, -17.053958892822266, -62.64604187011719, -35.36515808105469, 240.14138793945312, -4.678186416625977, 27.569303512573242, 10.640033721923828, 2.464038848876953, 20.985286712646484, 126.82861328125, 118.93130493164062, -77.66942596435547, 192.3697052001953, 86.35562133789062, 23.870620727539062, 35.098175048828125, 97.76052856445312, 32.00923156738281, 63.54607391357422, 17.20873260498047, -21.98479461669922, 13.472343444824219, 92.818359375, 12.719245910644531, 179.21865844726562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000126.npy"}
|
||||
{"epoch": 0.18502202643171806, "step": 127, "batch_size": 64, "mean": 39.8004264831543, "std": 63.418663024902344, "min": -126.01162719726562, "p10": -35.92738723754883, "median": 43.01145553588867, "p90": 117.56906204223634, "max": 183.78099060058594, "pos_frac": 0.78125, "sample": [55.745033264160156, 80.63714599609375, -10.027362823486328, 40.87238311767578, 2.533355712890625, -96.06739807128906, 152.11573791503906, 8.951431274414062, 96.59795379638672, -126.01162719726562, 8.208992004394531, 62.920066833496094, 75.04118347167969, -34.81016540527344, 18.53407096862793, 36.88175964355469, 95.09819793701172, 10.998231887817383, -122.55271911621094, 76.00249481201172, 25.50774383544922, 72.6611328125, 128.99119567871094, 20.911865234375, 68.82792663574219, 173.54440307617188, 21.927753448486328, 85.873046875, -24.26811981201172, 16.155948638916016, 121.64974212646484, 77.74373626708984, 110.6220703125, 25.096817016601562, 51.99986267089844, 150.39645385742188, -2.2198009490966797, -18.98064422607422, 19.543243408203125, -2.4312820434570312, 17.239261627197266, 113.1712417602539, 9.25700569152832, 77.07500457763672, 45.15052795410156, 64.10781860351562, 46.24805450439453, 119.45384216308594, 110.22852325439453, 53.50395202636719, -46.36797332763672, 47.73143005371094, 18.986133575439453, 183.78099060058594, -61.08580780029297, 62.01298522949219, 48.09070587158203, 93.54360961914062, 46.910125732421875, 12.95356559753418, 31.902320861816406, -25.942893981933594, -36.40619659423828, -39.53868103027344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000127.npy"}
|
||||
{"epoch": 0.18649045521292218, "step": 128, "batch_size": 64, "mean": 42.07066345214844, "std": 74.8244400024414, "min": -172.208984375, "p10": -47.180390930175776, "median": 36.74879455566406, "p90": 147.1278259277344, "max": 255.3750762939453, "pos_frac": 0.6875, "sample": [48.085636138916016, 44.246070861816406, 136.12960815429688, -13.653480529785156, 91.7744140625, -61.456729888916016, 102.789794921875, 145.99075317382812, 35.90592956542969, 43.510536193847656, 169.25714111328125, 52.247398376464844, 49.85890197753906, 35.59208679199219, -89.03960418701172, 100.94447326660156, 154.69369506835938, 4.7227020263671875, -0.4044208526611328, 75.49761962890625, -73.37199401855469, -3.901378631591797, 255.3750762939453, -172.208984375, 148.99649047851562, 49.265220642089844, 160.45947265625, 147.61514282226562, 71.09644317626953, 48.20101547241211, -11.10055923461914, -59.03953552246094, 105.3876724243164, -5.241086959838867, -18.85101318359375, 21.808128356933594, -79.00047302246094, -3.803089141845703, 34.23906707763672, 46.2161750793457, 88.76590728759766, -39.85832214355469, 54.94384765625, -24.608470916748047, -10.558023452758789, 150.3783416748047, 33.04079055786133, 37.59165954589844, 22.389362335205078, 52.26076126098633, 22.24742889404297, -9.572202682495117, 128.492919921875, -1.4572620391845703, 12.116052627563477, 3.442291259765625, -13.206001281738281, 55.410545349121094, 103.75776672363281, 12.04507827758789, -50.31842041015625, 129.85687255859375, 145.35446166992188, 1.172780990600586], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000128.npy"}
|
||||
{"epoch": 0.18795888399412627, "step": 129, "batch_size": 64, "mean": 44.24713134765625, "std": 84.6582260131836, "min": -143.8267822265625, "p10": -43.857168197631836, "median": 31.81517219543457, "p90": 145.1571304321289, "max": 366.42816162109375, "pos_frac": 0.671875, "sample": [2.589620590209961, 32.9114875793457, -44.71953201293945, 110.64881896972656, -48.42308807373047, 122.4066162109375, 9.238349914550781, -41.84498596191406, 169.72616577148438, 26.78441619873047, -12.775238037109375, 106.68450927734375, 26.91397476196289, 45.748321533203125, 58.54249954223633, 73.32524108886719, 145.38967895507812, -38.961822509765625, -29.877593994140625, 53.5382080078125, 220.2698974609375, 87.44638061523438, 220.69454956054688, -97.3742446899414, 87.05934143066406, -88.80789184570312, -45.08143615722656, 40.26701736450195, 366.42816162109375, 141.29217529296875, 28.87376594543457, 19.559967041015625, 1.259979248046875, 77.61170196533203, -11.50799560546875, -16.980701446533203, 30.718856811523438, 122.10250854492188, 29.252967834472656, -16.478317260742188, -74.69718170166016, -7.231512069702148, 49.450927734375, 80.85285949707031, 144.61451721191406, -143.8267822265625, 17.742700576782227, 20.64092254638672, -8.987546920776367, -6.997249603271484, 138.00115966796875, 173.8178253173828, 39.646766662597656, 47.36297607421875, 70.86164855957031, -15.701858520507812, 43.975311279296875, 55.334503173828125, -38.43450164794922, 86.65399932861328, 148.14944458007812, -29.575599670410156, 76.2066421508789, -0.4958019256591797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000129.npy"}
|
||||
{"epoch": 0.1894273127753304, "step": 130, "batch_size": 64, "mean": 66.64889526367188, "std": 84.98664855957031, "min": -110.17389678955078, "p10": -20.95257949829101, "median": 46.01510238647461, "p90": 195.54732666015627, "max": 327.4662170410156, "pos_frac": 0.78125, "sample": [74.14311218261719, 39.257896423339844, 30.2291259765625, 186.57537841796875, -15.702445983886719, 11.830398559570312, 200.85836791992188, 80.97039031982422, -53.400245666503906, -7.779638290405273, 15.922788619995117, 59.825923919677734, 34.323360443115234, 188.35092163085938, 205.03903198242188, 23.00115203857422, 93.99772644042969, -8.081140518188477, 36.74986267089844, 72.73160552978516, -11.98135757446289, 32.19758605957031, 7.982582092285156, 87.54350280761719, 92.0772705078125, -4.841499328613281, 33.314666748046875, 91.67399597167969, 57.685874938964844, 179.07733154296875, 142.3121337890625, 215.24908447265625, -33.440467834472656, 27.855697631835938, 148.2082977294922, -104.83761596679688, 33.94114685058594, 29.514862060546875, 34.90743637084961, -23.20263671875, 61.18055725097656, 52.43659591674805, 74.68010711669922, 241.1837615966797, 155.51934814453125, -7.762763977050781, 327.4662170410156, -40.239051818847656, -3.8189563751220703, 87.3604965209961, 33.25653839111328, 39.59360885620117, 198.63150024414062, -32.17736053466797, 146.16029357910156, -110.17389678955078, 131.8668212890625, 1.8132801055908203, 123.68524169921875, 114.1156005859375, 214.11312866210938, 57.90028381347656, 32.964820861816406, 61.69132995605469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000130.npy"}
|
||||
{"epoch": 0.19089574155653452, "step": 131, "batch_size": 64, "mean": 66.88613891601562, "std": 73.83077239990234, "min": -150.745849609375, "p10": -0.9118213653564453, "median": 50.76441192626953, "p90": 161.0794677734375, "max": 268.0539855957031, "pos_frac": 0.875, "sample": [50.671485900878906, 45.73102569580078, 50.857337951660156, 29.065448760986328, 268.0539855957031, -150.745849609375, 57.494937896728516, 40.072731018066406, 31.326438903808594, 104.98413848876953, 108.09930419921875, 20.369873046875, 61.28289794921875, 13.323423385620117, 99.79534912109375, 35.81904602050781, 41.81603240966797, -1.9133739471435547, 139.78372192382812, 75.35658264160156, 42.923118591308594, 158.13424682617188, -12.669319152832031, -12.629457473754883, 50.888519287109375, 42.59715270996094, 84.66783142089844, 7.496063232421875, 202.76934814453125, 39.6044807434082, 41.13534164428711, -84.38619995117188, 162.34170532226562, 9.72296142578125, 226.15029907226562, 4.211210250854492, 100.43233489990234, 29.64214324951172, 139.25987243652344, 47.246315002441406, 127.70156860351562, 31.271665573120117, 207.32418823242188, 27.02365493774414, -92.03451538085938, 125.60549926757812, 6.8065643310546875, 105.51616668701172, -0.9300575256347656, 25.11556625366211, -0.8692703247070312, 137.2013397216797, 9.182395935058594, 175.9803924560547, 153.99789428710938, 165.8990478515625, 89.39884948730469, 66.43673706054688, 29.70473861694336, 79.99937438964844, 129.05245971679688, 95.18366241455078, 99.82186889648438, 85.54093933105469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000131.npy"}
|
||||
{"epoch": 0.19236417033773862, "step": 132, "batch_size": 64, "mean": 54.898109436035156, "std": 87.75269317626953, "min": -157.57757568359375, "p10": -17.226745414733884, "median": 42.74197959899902, "p90": 146.60068664550784, "max": 374.057861328125, "pos_frac": 0.796875, "sample": [13.698753356933594, 10.271419525146484, 148.92132568359375, -61.33917999267578, 68.39337158203125, 37.07658386230469, 64.85995483398438, 289.3802185058594, 315.1575927734375, 7.772727966308594, 71.37637329101562, 108.69886016845703, 21.25434684753418, 76.93858337402344, -157.57757568359375, 5.1299285888671875, 220.1453857421875, -36.29956817626953, 65.80107879638672, -14.948793411254883, 1.8960094451904297, -12.194190979003906, 129.56314086914062, 67.58480834960938, 20.60918426513672, -18.20301055908203, -0.030374526977539062, 114.23509979248047, 8.790939331054688, 4.8127593994140625, -13.867523193359375, -12.71380615234375, 19.1331729888916, 93.39441680908203, 80.8216552734375, 147.8988037109375, 51.44226837158203, 68.12718200683594, 78.26217651367188, 50.03322982788086, 55.20643615722656, 33.97700119018555, 29.93265151977539, 143.57174682617188, -106.64505004882812, 48.40737533569336, 121.1141128540039, -67.46979522705078, 13.47250747680664, 93.89939880371094, -19.36277961730957, 58.321807861328125, 105.29974365234375, 18.53327178955078, 56.205963134765625, -14.727630615234375, 32.519981384277344, 103.51771545410156, 27.03717041015625, 12.337200164794922, 178.98016357421875, 27.666297912597656, 53.318763732910156, 374.057861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000132.npy"}
|
||||
{"epoch": 0.19383259911894274, "step": 133, "batch_size": 64, "mean": 48.50770568847656, "std": 75.95181274414062, "min": -106.50254821777344, "p10": -20.619416809082026, "median": 38.41263961791992, "p90": 145.71980285644537, "max": 250.32887268066406, "pos_frac": 0.765625, "sample": [11.450790405273438, 70.19551086425781, -9.470413208007812, 110.44021606445312, 7.905525207519531, -39.86585998535156, -7.684600830078125, 35.73150634765625, -74.80424499511719, 250.32887268066406, 51.16747283935547, 72.50774383544922, -5.9777984619140625, 104.28533172607422, -79.26629638671875, 18.523956298828125, 92.58616638183594, -7.974395751953125, 215.0245819091797, 5.683374404907227, 66.58155059814453, 83.586181640625, -12.010047912597656, 131.11236572265625, 2.4013824462890625, 105.13865661621094, 46.65257263183594, 77.24784851074219, 85.63806915283203, 69.82940673828125, 10.5792236328125, 21.01844024658203, 26.1191463470459, 242.88540649414062, -2.0299072265625, 50.90636444091797, 71.4315414428711, -68.40620422363281, 207.55691528320312, 39.321128845214844, 40.95303726196289, 37.504150390625, 53.856590270996094, 241.4422607421875, -106.50254821777344, -14.81396484375, 64.97897338867188, 176.08184814453125, 18.847732543945312, 50.889774322509766, 7.803924560546875, 121.17156982421875, -54.675537109375, 151.98013305664062, 102.30829620361328, -23.107467651367188, 65.86026000976562, 48.247467041015625, -10.140777587890625, 20.28761863708496, 7.6946563720703125, 0.4360218048095703, 9.830558776855469, 17.210914611816406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000133.npy"}
|
||||
{"epoch": 0.19530102790014683, "step": 134, "batch_size": 64, "mean": 51.83448028564453, "std": 68.40628051757812, "min": -45.251365661621094, "p10": -32.7641523361206, "median": 42.87419319152832, "p90": 132.65450897216797, "max": 335.3938293457031, "pos_frac": 0.8125, "sample": [16.668716430664062, -45.251365661621094, -38.964866638183594, 125.37594604492188, 199.124267578125, 34.2952880859375, 16.53670883178711, 42.626590728759766, -34.64887237548828, 50.002357482910156, 120.36476135253906, 25.092266082763672, 12.080509185791016, 3.8259048461914062, 53.13298797607422, 42.57194519042969, -28.366472244262695, 47.695220947265625, 78.79133605957031, 25.592647552490234, 6.099428176879883, 10.607948303222656, 99.19393157958984, 18.7249755859375, 166.8382110595703, -2.2501983642578125, 109.9957275390625, 97.15385437011719, 54.74343490600586, 18.819293975830078, 50.74171447753906, 134.3798065185547, 1.817464828491211, 47.69788360595703, 169.15695190429688, 114.57222747802734, 128.62881469726562, -41.05005645751953, 85.29579162597656, -18.90259552001953, 59.281280517578125, -35.9244384765625, 65.54557800292969, 7.212553024291992, 335.3938293457031, 11.260379791259766, 78.5142822265625, 40.568939208984375, 73.625, -4.969264984130859, 63.99214172363281, -26.237701416015625, -34.919822692871094, 176.95169067382812, 60.658287048339844, 2.7618408203125, 60.32405090332031, 39.03977584838867, 43.121795654296875, 72.19475555419922, -39.559791564941406, 81.24671936035156, 32.52219009399414, 155.99224853515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000134.npy"}
|
||||
{"epoch": 0.19676945668135096, "step": 135, "batch_size": 64, "mean": 74.76806640625, "std": 94.97622680664062, "min": -89.15797424316406, "p10": -34.659814453124994, "median": 53.61056900024414, "p90": 218.41308135986333, "max": 360.62164306640625, "pos_frac": 0.8125, "sample": [30.19976043701172, 134.7310791015625, 33.0059928894043, 240.74728393554688, 129.65777587890625, 73.9731216430664, 247.2167205810547, 44.1781120300293, 245.22280883789062, 221.88572692871094, 134.0233154296875, 90.50712585449219, 191.8585205078125, 128.1136474609375, 272.86712646484375, 117.899169921875, 30.757659912109375, 205.65646362304688, 12.57058334350586, -41.513938903808594, 1.0876026153564453, 113.56199645996094, 160.8594970703125, 80.00648498535156, 39.027740478515625, 97.97537231445312, 1.1419525146484375, 360.62164306640625, -23.378002166748047, 117.5244140625, -61.29840087890625, 204.21893310546875, 210.31024169921875, -73.554443359375, 40.22994613647461, -16.99625015258789, 27.39023208618164, 57.12340545654297, 128.58160400390625, -2.67156982421875, -59.84349822998047, 54.04302978515625, 85.24189758300781, 57.97434997558594, 1.5195960998535156, 53.17810821533203, 4.176296234130859, -15.490066528320312, 47.15080261230469, 17.61638641357422, 33.316158294677734, 112.55110168457031, 93.94845581054688, 87.74516296386719, 7.093406677246094, 100.91346740722656, 44.387939453125, 231.50869750976562, 14.587425231933594, -89.15797424316406, -28.80047607421875, -37.17095947265625, -43.15907287597656, 6.505790710449219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000135.npy"}
|
||||
{"epoch": 0.19823788546255505, "step": 136, "batch_size": 64, "mean": 49.48542022705078, "std": 93.52610778808594, "min": -77.690185546875, "p10": -46.89946174621582, "median": 28.828155517578125, "p90": 123.77716903686523, "max": 414.677490234375, "pos_frac": 0.6875, "sample": [63.820491790771484, 74.20240783691406, 103.80743408203125, 100.7135009765625, -8.816696166992188, -49.33280563354492, 300.21600341796875, 133.0731201171875, -18.69085693359375, 65.46549987792969, 101.60490417480469, 66.05010986328125, -24.860931396484375, 13.36301040649414, 76.58926391601562, 54.410858154296875, 81.75119018554688, 34.58427429199219, 115.83851623535156, 324.4103088378906, -1.0311279296875, 72.4036865234375, 414.677490234375, 2.2528228759765625, 114.68550109863281, -0.03725433349609375, 223.99554443359375, -7.500217437744141, 260.75244140625, 25.53207778930664, 9.835945129394531, 1.9523582458496094, 58.71296691894531, -75.04489135742188, 53.68067932128906, -64.01760864257812, 19.921493530273438, -25.431442260742188, 89.64976501464844, 88.14419555664062, 10.437803268432617, -16.060089111328125, -15.262619018554688, 62.60755920410156, 9.601421356201172, 2.925508499145508, -25.323246002197266, 32.12423324584961, 93.44119262695312, 123.54505157470703, -47.050758361816406, -64.42195892333984, 39.259246826171875, 77.60334014892578, -77.690185546875, 2.1385574340820312, 72.12785339355469, -21.864368438720703, -46.54643630981445, 23.898605346679688, -4.659721374511719, 123.87664794921875, 3.5702743530273438, -62.54491424560547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000136.npy"}
|
||||
{"epoch": 0.19970631424375918, "step": 137, "batch_size": 64, "mean": 77.95802307128906, "std": 114.30103302001953, "min": -161.82867431640625, "p10": -27.800493621826167, "median": 51.081417083740234, "p90": 253.38302612304713, "max": 392.46124267578125, "pos_frac": 0.84375, "sample": [-44.65516662597656, 11.57132339477539, 75.4420166015625, 122.77413177490234, 154.26528930664062, 33.089149475097656, 47.24574279785156, 90.27700805664062, -22.186565399169922, 102.64602661132812, 3.701955795288086, 96.39686584472656, 387.7835693359375, 18.956506729125977, 39.55836486816406, -114.7435302734375, 112.69003295898438, 11.074142456054688, 37.003211975097656, 153.7771453857422, 159.62060546875, 81.71639251708984, 2.4948806762695312, 392.46124267578125, 111.2386474609375, 329.4830322265625, 190.00234985351562, 126.82048034667969, 18.345861434936523, 170.09939575195312, -94.82646179199219, 10.420379638671875, 70.46642303466797, 9.386329650878906, 135.42343139648438, 9.590465545654297, 128.10708618164062, 51.11383819580078, 8.900520324707031, 51.04899597167969, 64.10289001464844, 43.43470001220703, -3.9035873413085938, -161.82867431640625, -45.80242156982422, -73.02498626708984, 365.6900634765625, -30.206462860107422, 40.05803680419922, 297.7340393066406, 54.18719482421875, 129.817138671875, 3.6495437622070312, 94.46035766601562, -17.347753524780273, 29.097749710083008, 52.21382141113281, 9.59372329711914, 0.3145313262939453, 106.08202362060547, 310.1812744140625, 36.31074523925781, 125.37210845947266, 280.5461730957031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000137.npy"}
|
||||
{"epoch": 0.2011747430249633, "step": 138, "batch_size": 64, "mean": 59.33885192871094, "std": 73.00718688964844, "min": -114.8970947265625, "p10": -17.728237152099606, "median": 43.85134315490723, "p90": 151.6490127563477, "max": 279.9593505859375, "pos_frac": 0.78125, "sample": [159.50167846679688, -13.23321533203125, 73.83746337890625, 138.51199340820312, 6.743072509765625, 115.68904876708984, -18.617019653320312, -1.1247138977050781, 8.659149169921875, 174.80445861816406, 32.25532531738281, 20.373855590820312, 43.13926696777344, 11.15827751159668, 28.307905197143555, -114.8970947265625, 17.10135841369629, -22.128421783447266, -1.6357879638671875, -11.515388488769531, 190.6724853515625, 179.79049682617188, 15.18792724609375, 9.486076354980469, 119.58828735351562, 130.32774353027344, 132.5316162109375, -4.1106109619140625, 95.13944244384766, 139.05360412597656, 56.624267578125, 29.281476974487305, 128.5288848876953, 7.48883056640625, 139.54620361328125, 172.2701873779297, 75.49755859375, 44.563419342041016, -68.42843627929688, 62.28887176513672, 66.33100128173828, 28.029205322265625, 89.82847595214844, 0.615753173828125, -6.074089050292969, -54.53428649902344, 99.0897445678711, 140.5001220703125, 14.888263702392578, 38.90126037597656, 114.41725158691406, 33.96812438964844, 138.01792907714844, -34.495262145996094, -20.486486434936523, 60.764644622802734, -15.654411315917969, 72.62052917480469, 20.549270629882812, 279.9593505859375, 103.7309799194336, 58.18751525878906, 109.84484100341797, 156.42710876464844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000138.npy"}
|
||||
{"epoch": 0.2026431718061674, "step": 139, "batch_size": 64, "mean": 70.53996276855469, "std": 87.66817474365234, "min": -108.65182495117188, "p10": -31.359179687499996, "median": 57.8461799621582, "p90": 164.7234588623047, "max": 374.8907775878906, "pos_frac": 0.8125, "sample": [-19.229698181152344, 156.37188720703125, 100.28099822998047, 174.9274444580078, -33.6737060546875, 67.67375946044922, 121.14110565185547, -12.234764099121094, 83.27711486816406, 147.52691650390625, -108.65182495117188, 58.61222839355469, -45.89175796508789, 276.8145751953125, 139.2911376953125, 46.59648895263672, 88.61579895019531, 57.08013153076172, 34.76782989501953, 62.991207122802734, 143.968017578125, 73.84156036376953, -71.9052505493164, 123.8735580444336, 49.55657958984375, 1.5494194030761719, 154.4517364501953, 85.46607971191406, 14.641096115112305, 40.08753204345703, 153.3822021484375, 25.830598831176758, 124.71481323242188, 116.37779998779297, 39.374114990234375, 94.54457092285156, -21.970909118652344, -1.234731674194336, 9.75373649597168, 15.861732482910156, 81.59024810791016, 1.080902099609375, 17.088348388671875, 150.50234985351562, 23.458499908447266, 271.2991943359375, 96.84900665283203, -25.9586181640625, 77.70600128173828, -47.93799591064453, 83.29241180419922, -47.864845275878906, 33.87999725341797, 86.0699691772461, 374.8907775878906, 40.13493728637695, 250.70481872558594, 39.43623352050781, 49.416038513183594, -39.20494079589844, 188.51162719726562, 30.892730712890625, 41.96625518798828, 168.30270385742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000139.npy"}
|
||||
{"epoch": 0.20411160058737152, "step": 140, "batch_size": 64, "mean": 64.14215850830078, "std": 90.56130981445312, "min": -130.58775329589844, "p10": -49.99583778381348, "median": 64.58736038208008, "p90": 174.87683105468753, "max": 352.08740234375, "pos_frac": 0.734375, "sample": [-27.33707046508789, 125.84860229492188, 352.08740234375, -77.07378387451172, -13.112030029296875, 126.20585632324219, 84.95116424560547, 146.88165283203125, 59.01335906982422, 166.3990936279297, 119.24427795410156, 72.21455383300781, 114.41844177246094, 42.18519973754883, 24.943984985351562, 85.65824890136719, 275.5821533203125, 12.33224868774414, 5.880435943603516, 76.09062957763672, 20.284439086914062, 65.3894271850586, 86.13496398925781, 136.09341430664062, -2.1944351196289062, -58.08671569824219, 256.15069580078125, -51.93510437011719, 37.72174072265625, 80.38798522949219, 23.3339900970459, 59.92894744873047, 8.26099967956543, 115.57498168945312, -32.596923828125, 64.55636596679688, 93.79353332519531, 64.61835479736328, -9.651824951171875, 162.53750610351562, 133.26361083984375, -34.063995361328125, 178.51014709472656, -130.58775329589844, -1.0319366455078125, 63.62799072265625, 62.02961730957031, 92.98213195800781, 189.02349853515625, 25.051734924316406, -9.855775833129883, 155.6230010986328, -14.645952224731445, 21.192245483398438, -89.12168884277344, -50.468875885009766, -72.74842071533203, 68.13517761230469, 184.94512939453125, 181.38870239257812, -48.89208221435547, 87.38377380371094, 84.23551940917969, 136.40570068359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000140.npy"}
|
||||
{"epoch": 0.2055800293685756, "step": 141, "batch_size": 64, "mean": 60.85835647583008, "std": 96.27452850341797, "min": -244.26358032226562, "p10": -47.03740921020507, "median": 59.39303398132324, "p90": 184.0755676269532, "max": 299.31732177734375, "pos_frac": 0.765625, "sample": [61.317474365234375, 40.405303955078125, -14.87496566772461, 103.99492645263672, 85.22883605957031, -33.11695098876953, 110.12603759765625, 62.705902099609375, 1.1693649291992188, -7.457389831542969, 57.46859359741211, 260.32342529296875, -244.26358032226562, 50.869178771972656, -6.3417816162109375, 34.67927551269531, 0.0774993896484375, -51.9285888671875, 268.44439697265625, -62.19514465332031, -35.624656677246094, 202.3866424560547, 48.72331619262695, 84.91107177734375, 145.43536376953125, 81.21739196777344, 106.90803527832031, 2.8755264282226562, -59.878326416015625, 73.30067443847656, 111.47706604003906, 78.19598388671875, 33.942230224609375, 170.09103393554688, 253.75692749023438, 27.015649795532227, 117.59581756591797, 24.847274780273438, -20.328771591186523, 17.603832244873047, -24.70555877685547, 48.38990783691406, -53.240867614746094, 190.06893920898438, 5.662336349487305, 139.31039428710938, 196.220947265625, 122.16761779785156, 11.663461685180664, -164.39901733398438, 19.27050018310547, 96.09379577636719, 63.66682434082031, 91.76741027832031, 116.87615966796875, 133.90911865234375, 299.31732177734375, 128.79153442382812, -0.7142620086669922, 162.75331115722656, 70.95167541503906, 105.90921020507812, -51.950233459472656, 6.070159912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000141.npy"}
|
||||
{"epoch": 0.20704845814977973, "step": 142, "batch_size": 64, "mean": 69.63619995117188, "std": 107.96939849853516, "min": -212.34603881835938, "p10": -43.39850845336913, "median": 37.03660583496094, "p90": 239.59020385742187, "max": 294.63739013671875, "pos_frac": 0.765625, "sample": [-21.70842742919922, 263.4952697753906, 33.55150604248047, 43.226112365722656, -31.433273315429688, 244.59458923339844, -27.433467864990234, -22.42462730407715, -90.964599609375, 32.612613677978516, 136.78155517578125, 96.7135009765625, 161.26373291015625, 30.341806411743164, 38.29601287841797, 223.29713439941406, 14.587955474853516, -212.34603881835938, -151.19680786132812, 74.35883331298828, 20.961952209472656, 32.16282653808594, 17.078716278076172, 294.63739013671875, 167.48387145996094, 35.777198791503906, 147.36541748046875, 26.834257125854492, 145.2664794921875, -21.53009796142578, 108.35263061523438, 121.50483703613281, 240.2198028564453, 83.47425079345703, -1.898468017578125, 208.66880798339844, 216.95240783691406, 17.11532211303711, 20.909292221069336, 42.45973205566406, -62.81233215332031, -50.81142044067383, 3.1866836547851562, 238.1211395263672, 69.2642593383789, 246.56504821777344, 14.20547103881836, 21.52027130126953, 71.6627197265625, -48.526466369628906, 57.91012954711914, -19.826644897460938, 6.71806526184082, -82.61714172363281, 193.96031188964844, 248.8304443359375, 27.222930908203125, 195.26889038085938, 26.698577880859375, -7.974020004272461, 127.4983139038086, 255.85736083984375, 39.740150451660156, 125.64398956298828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000142.npy"}
|
||||
{"epoch": 0.20851688693098386, "step": 143, "batch_size": 64, "mean": 44.3311882019043, "std": 94.11837005615234, "min": -154.46664428710938, "p10": -55.794631195068355, "median": 28.741188049316406, "p90": 146.9894561767578, "max": 332.42822265625, "pos_frac": 0.65625, "sample": [83.85661315917969, -7.4458465576171875, 54.195152282714844, 24.793930053710938, 280.78369140625, 91.80457305908203, -57.816436767578125, 100.43743133544922, 106.64947509765625, 23.356409072875977, 145.91268920898438, -83.96920776367188, -56.515098571777344, 136.55233764648438, 83.57758331298828, -127.92499542236328, -48.048675537109375, -2.3018569946289062, 104.6448974609375, -3.936859130859375, 34.32061004638672, 255.7193603515625, -66.42009735107422, 98.72380065917969, 18.701934814453125, -17.005401611328125, 45.4134521484375, 147.450927734375, 30.068458557128906, 16.44791030883789, -11.995452880859375, -47.569244384765625, -54.11354064941406, 5.029439926147461, 136.28526306152344, 4.779327392578125, 44.93561553955078, 66.95441436767578, 26.811981201171875, 332.42822265625, -35.579750061035156, -13.043556213378906, 35.1865234375, 83.84286499023438, -3.0336246490478516, 56.377471923828125, 155.26193237304688, 138.44342041015625, -6.824981689453125, -46.51446533203125, -91.80433654785156, 288.6719970703125, -34.28263854980469, 79.40673065185547, 67.01463317871094, 31.70103645324707, 110.27629089355469, 27.413917541503906, 180.18408203125, -154.46664428710938, -11.095664978027344, 42.18034362792969, 3.9459762573242188, 18.361648559570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000143.npy"}
|
||||
{"epoch": 0.20998531571218795, "step": 144, "batch_size": 64, "mean": 54.54907989501953, "std": 79.60639190673828, "min": -111.27597045898438, "p10": -33.957157135009766, "median": 46.890663146972656, "p90": 172.07891235351562, "max": 243.69369506835938, "pos_frac": 0.734375, "sample": [-34.354278564453125, 111.54488372802734, -5.134702682495117, 172.07284545898438, -105.77982330322266, -19.528221130371094, 71.29181671142578, 120.19506072998047, 46.438316345214844, 10.43221664428711, 65.01214599609375, -37.93246078491211, 73.00907897949219, 104.30690002441406, 57.87358856201172, 172.08151245117188, 51.01305389404297, 204.97409057617188, 105.5365219116211, 142.09152221679688, 180.64364624023438, 192.925537109375, -25.079782485961914, -111.27597045898438, 105.21549224853516, -26.72320556640625, 48.293434143066406, -3.7655410766601562, 69.73299407958984, 22.97079086303711, -53.721580505371094, 74.0296859741211, 67.83695983886719, 19.23333740234375, -48.25493621826172, 139.09803771972656, 48.32122039794922, 27.51089096069336, 24.56085205078125, 93.43099212646484, 32.55085754394531, 94.14856719970703, 243.69369506835938, 27.967618942260742, 26.046104431152344, 76.35786437988281, -33.030540466308594, 24.201919555664062, 19.731338500976562, 103.08360290527344, 122.05113983154297, 233.8687286376953, -3.749725341796875, -29.72320556640625, -16.496551513671875, -22.56281280517578, 222.66172790527344, 17.115245819091797, 23.62563133239746, 0.01879119873046875, 47.34300994873047, 28.544281005859375, 150.21266174316406, -46.645843505859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000144.npy"}
|
||||
{"epoch": 0.21145374449339208, "step": 145, "batch_size": 64, "mean": 58.77191925048828, "std": 81.89253234863281, "min": -121.53300476074219, "p10": -42.566898727416984, "median": 46.077348709106445, "p90": 151.4165008544922, "max": 269.8736877441406, "pos_frac": 0.75, "sample": [23.566268920898438, -48.880035400390625, -96.89442443847656, 269.8736877441406, 150.66781616210938, 116.69783782958984, 250.42636108398438, 3.487812042236328, -121.53300476074219, 58.5994987487793, 4.069793701171875, 78.29945373535156, 147.94705200195312, -54.36239242553711, 10.536670684814453, 120.6658706665039, 56.83618927001953, 43.42522430419922, 120.57025146484375, -46.26476287841797, -10.624181747436523, 39.454254150390625, 43.34038543701172, 138.848876953125, 90.09188842773438, -66.00941467285156, -10.141220092773438, 30.899089813232422, -8.8253173828125, 181.49728393554688, 24.927724838256836, 12.818023681640625, 114.64085388183594, -33.93854904174805, -54.439453125, 120.64863586425781, 18.326583862304688, 21.872255325317383, 79.7257308959961, -10.313827514648438, 67.24980163574219, -7.013782501220703, 119.38089752197266, -16.596210479736328, 117.4560546875, 190.94175720214844, 104.08363342285156, -22.704910278320312, 51.04478073120117, 127.34252166748047, 30.787372589111328, 42.92241287231445, 145.78187561035156, 159.80636596679688, 58.67721176147461, 18.540225982666016, -4.349403381347656, 64.58854675292969, 134.3128662109375, 119.01306915283203, 237.32943725585938, 48.72947311401367, 11.806373596191406, 151.73736572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000145.npy"}
|
||||
{"epoch": 0.21292217327459617, "step": 146, "batch_size": 64, "mean": 45.2476806640625, "std": 89.26947784423828, "min": -240.04562377929688, "p10": -70.22802505493163, "median": 60.02116012573242, "p90": 158.3383682250977, "max": 246.0668487548828, "pos_frac": 0.671875, "sample": [33.80925750732422, -35.98333740234375, 108.64041137695312, -64.73918914794922, 18.73065757751465, 2.6288833618164062, 75.6666488647461, 69.18661499023438, 78.56178283691406, 66.63970947265625, 213.03643798828125, 101.71903228759766, 43.78227233886719, -9.728012084960938, 25.278282165527344, -7.952766418457031, 94.4140853881836, -6.364532470703125, 38.289344787597656, -18.080766677856445, -5.479991912841797, 31.972915649414062, 62.28368377685547, 116.03068542480469, -104.14808654785156, 69.7757339477539, 149.3812713623047, -240.04562377929688, -72.58038330078125, 208.22500610351562, -81.80741119384766, 74.22357940673828, -6.076421737670898, 76.80340576171875, 191.5637969970703, -80.98619842529297, 48.1865234375, 61.029457092285156, 119.6664047241211, 246.0668487548828, 6.786382675170898, -5.7089691162109375, 61.14096450805664, 79.95719909667969, 61.2725830078125, 105.32357025146484, 103.46951293945312, -98.99031066894531, 59.01286315917969, -77.82958221435547, 79.49520874023438, 141.68704223632812, 162.1771240234375, 74.91209411621094, -8.178916931152344, 95.1988296508789, 36.539634704589844, -36.349159240722656, 86.31565856933594, -55.04174041748047, 227.76795959472656, -33.40986633300781, 189.07888793945312, -50.39540100097656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000146.npy"}
|
||||
{"epoch": 0.2143906020558003, "step": 147, "batch_size": 64, "mean": 61.400856018066406, "std": 91.30735778808594, "min": -147.00946044921875, "p10": -31.272994995117184, "median": 48.359161376953125, "p90": 192.55957336425783, "max": 295.91058349609375, "pos_frac": 0.75, "sample": [110.427490234375, -67.10455322265625, 43.52522277832031, 234.57638549804688, -29.19812774658203, -9.336540222167969, 97.34573364257812, 98.17864227294922, 51.61180114746094, -43.451316833496094, 29.95597267150879, 76.39166259765625, 60.29998016357422, 124.23625183105469, 25.213733673095703, 195.79571533203125, 115.28657531738281, 20.44283676147461, -1.2415008544921875, 185.00857543945312, 161.32801818847656, -26.04138946533203, 174.2873077392578, 23.624353408813477, 278.3216247558594, 49.40504455566406, 202.58424377441406, 110.9608154296875, -14.875898361206055, 47.31327819824219, 31.534191131591797, -135.26124572753906, 5.6396026611328125, 117.95216369628906, 105.23385620117188, -26.457382202148438, 61.81951904296875, 295.91058349609375, 78.04644012451172, 20.722400665283203, 180.05471801757812, 15.79498291015625, -12.475667953491211, 14.682456970214844, 32.56471252441406, 60.211326599121094, -11.858993530273438, 80.52548217773438, 66.21759796142578, -34.291908264160156, 18.18939208984375, 51.74411392211914, 19.817699432373047, 177.91123962402344, -26.50672149658203, 59.9261589050293, 242.54946899414062, -32.16222381591797, 31.985679626464844, -147.00946044921875, 215.9365234375, -59.68025207519531, 60.83238220214844, 44.684165954589844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000147.npy"}
|
||||
{"epoch": 0.21585903083700442, "step": 148, "batch_size": 64, "mean": 52.805450439453125, "std": 83.90935516357422, "min": -105.18499755859375, "p10": -30.25096054077148, "median": 36.593475341796875, "p90": 146.58033752441406, "max": 339.08526611328125, "pos_frac": 0.796875, "sample": [-6.665958404541016, 50.56139373779297, 7.1011962890625, 83.42156982421875, -59.28496551513672, 105.4561538696289, 78.70579528808594, 39.285762786865234, -41.016441345214844, 200.58839416503906, 21.355079650878906, 143.77471923828125, 19.634244918823242, -13.323829650878906, 156.7622833251953, 256.81500244140625, 2.7370147705078125, 16.178192138671875, -40.190940856933594, -73.44721984863281, 136.0157012939453, 129.78753662109375, -11.501089096069336, 92.89224243164062, 96.78077697753906, 42.44117736816406, 65.87222290039062, 49.62784194946289, 108.84526062011719, 1.1037921905517578, 147.78274536132812, 32.426300048828125, 38.435035705566406, 71.30250549316406, -31.347869873046875, 47.22694778442383, 32.458229064941406, 17.796890258789062, 113.26041412353516, 58.91145706176758, 132.85800170898438, 35.427391052246094, -85.51924133300781, 49.591522216796875, 37.759559631347656, 23.928421020507812, 187.03448486328125, 51.89311981201172, -3.936126708984375, -27.691505432128906, -105.18499755859375, 41.401885986328125, 9.499324798583984, 6.740358352661133, 33.920127868652344, 6.762537002563477, 34.29541778564453, 339.08526611328125, 321.94866943359375, 7.67913818359375, -4.312967300415039, 0.7863006591796875, 90.78616333007812, 6.23045539855957], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000148.npy"}
|
||||
{"epoch": 0.2173274596182085, "step": 149, "batch_size": 64, "mean": 65.48056030273438, "std": 82.64147186279297, "min": -187.12940979003906, "p10": -9.28653106689453, "median": 57.63277626037598, "p90": 154.80462493896485, "max": 310.38116455078125, "pos_frac": 0.828125, "sample": [113.61742401123047, 69.36297607421875, 208.39312744140625, 60.55031967163086, 108.9148941040039, 71.59739685058594, 74.69542694091797, -10.285345077514648, -18.96465301513672, -9.846675872802734, 21.340253829956055, -7.8790435791015625, 60.6342658996582, 45.02500915527344, 61.84967041015625, -67.12255859375, 92.14498901367188, 94.715087890625, 47.126068115234375, 135.05023193359375, 111.61161804199219, 175.83541870117188, 106.0239486694336, 29.24824333190918, 14.671480178833008, 100.14938354492188, 48.558990478515625, 128.07342529296875, 75.40402221679688, 135.2601318359375, 65.27013397216797, -1.6859703063964844, 310.38116455078125, 41.91725158691406, 155.52133178710938, 24.198455810546875, 225.40234375, 66.39543914794922, 28.119407653808594, 110.17243194580078, 25.193965911865234, 71.58549499511719, 11.672914505004883, 42.551361083984375, -0.43494415283203125, 54.715232849121094, 13.162555694580078, 53.425621032714844, -13.556121826171875, -187.12940979003906, 96.81231689453125, 13.643028259277344, 2.6265411376953125, 153.13230895996094, 10.759611129760742, 278.69482421875, 15.568338394165039, 41.60790252685547, 139.06378173828125, 266.47918701171875, 7.207557678222656, -7.979526519775391, 87.40090942382812, -86.89550018310547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000149.npy"}
|
||||
{"epoch": 0.21879588839941264, "step": 150, "batch_size": 64, "mean": 64.3185043334961, "std": 95.61563110351562, "min": -86.17083740234375, "p10": -20.220893859863278, "median": 38.37028694152832, "p90": 172.80112152099613, "max": 422.55218505859375, "pos_frac": 0.8125, "sample": [298.5655517578125, 322.20477294921875, 20.76972198486328, 30.884857177734375, 61.52635192871094, -39.280357360839844, 184.69839477539062, 37.565887451171875, -81.22783660888672, 73.2588882446289, 422.55218505859375, -8.761667251586914, -16.157997131347656, 18.458091735839844, -9.331642150878906, 132.30430603027344, 18.737686157226562, 53.032691955566406, 48.51499938964844, 74.12467193603516, 39.174686431884766, 57.475120544433594, 149.27056884765625, 27.424903869628906, 110.03178405761719, 90.22395324707031, 194.20411682128906, 133.81626892089844, 16.65972900390625, 68.098388671875, 14.27215576171875, -48.597686767578125, 84.03229522705078, 176.29298400878906, 69.83746337890625, 8.159072875976562, 164.6534423828125, 22.00141716003418, 146.1175537109375, -21.962135314941406, 96.18589782714844, -36.28430938720703, -41.955726623535156, 6.1757354736328125, 122.71648406982422, 5.160915374755859, 29.45215606689453, 63.475563049316406, 19.43254852294922, 39.418434143066406, 57.73390197753906, 69.87274169921875, 65.61550903320312, 19.84239959716797, 30.502599716186523, 352.49322509765625, 12.963338851928711, 17.66286849975586, 11.546243667602539, 29.802093505859375, 98.54462432861328, -11.347679138183594, -86.17083740234375, -0.080047607421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000150.npy"}
|
||||
{"epoch": 0.22026431718061673, "step": 151, "batch_size": 64, "mean": 64.55509185791016, "std": 80.68579864501953, "min": -145.84878540039062, "p10": -27.499513626098633, "median": 56.72659683227539, "p90": 182.42498168945312, "max": 252.0250244140625, "pos_frac": 0.828125, "sample": [-145.84878540039062, 222.18055725097656, 47.52418518066406, -3.2543563842773438, 43.53062438964844, 56.80586242675781, 68.81178283691406, 113.53555297851562, 70.89768981933594, 27.417404174804688, 182.69381713867188, -46.134674072265625, -28.26776123046875, 103.804443359375, 56.529632568359375, -45.493717193603516, -5.591312408447266, 42.39295959472656, 57.046142578125, 52.410072326660156, 231.96551513671875, 56.94431686401367, 147.3452606201172, 56.9832763671875, -55.506446838378906, 177.52218627929688, 25.834793090820312, 10.779144287109375, -25.70693588256836, 22.064483642578125, 36.20233154296875, 12.958181381225586, 19.697940826416016, 8.893728256225586, 113.09709167480469, 181.01092529296875, -64.05778503417969, 64.32633209228516, 104.5760269165039, 99.51142883300781, 8.007226943969727, 57.785858154296875, 100.14201354980469, 78.86814880371094, 11.692703247070312, 197.61740112304688, 106.69981384277344, 56.64733123779297, 121.68504333496094, -6.416862487792969, 215.7384490966797, 7.992469787597656, 2.9131107330322266, 142.11947631835938, 12.720962524414062, 15.452949523925781, 204.90426635742188, 109.83185577392578, 252.0250244140625, 181.79769897460938, -48.44849395751953, 77.63621520996094, 20.32494354248047, 108.35630798339844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000151.npy"}
|
||||
{"epoch": 0.22173274596182085, "step": 152, "batch_size": 64, "mean": 78.45030212402344, "std": 95.8574447631836, "min": -88.484130859375, "p10": -39.69433593749999, "median": 61.300127029418945, "p90": 217.458805847168, "max": 318.10968017578125, "pos_frac": 0.796875, "sample": [29.7049560546875, 58.22221374511719, 163.18431091308594, 174.90562438964844, 74.92831420898438, 205.15036010742188, 61.9343147277832, 146.072998046875, 318.10968017578125, 36.070396423339844, 155.43890380859375, 132.8471221923828, 36.28676223754883, -67.44741821289062, -4.7999267578125, 230.036376953125, 140.01849365234375, 50.65270233154297, 67.89938354492188, 62.84446716308594, 56.962486267089844, 53.09003448486328, -46.81907653808594, 74.83013916015625, 1.8626327514648438, 316.157958984375, 130.89559936523438, 229.1743621826172, 42.55989074707031, 22.60344696044922, 63.14524841308594, -51.07525634765625, 20.692516326904297, -7.029632568359375, 1.3106040954589844, 60.66593933105469, 19.60645294189453, 200.60757446289062, 32.05999755859375, 46.00885009765625, 288.5059509277344, -24.465652465820312, 54.9130859375, 210.35104370117188, 66.31829833984375, -32.4072265625, 240.55079650878906, 34.75640869140625, -75.9747085571289, 107.07586669921875, 3.178224563598633, 142.73947143554688, 92.50860595703125, 71.6360092163086, 220.50498962402344, -46.046539306640625, 161.95591735839844, -88.484130859375, -8.844017028808594, 116.71928405761719, -42.8173828125, -29.755203247070312, 135.73477172851562, 82.79550170898438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000152.npy"}
|
||||
{"epoch": 0.22320117474302498, "step": 153, "batch_size": 64, "mean": 67.03783416748047, "std": 102.10991668701172, "min": -177.0665283203125, "p10": -35.946903228759766, "median": 55.83584976196289, "p90": 200.53486633300787, "max": 413.35040283203125, "pos_frac": 0.75, "sample": [87.40528869628906, -2.150390625, 9.444442749023438, -7.052928924560547, 19.427898406982422, -10.164459228515625, 28.175861358642578, 158.10464477539062, -40.50312805175781, 189.87892150878906, 93.665283203125, 9.563499450683594, -41.7713737487793, 84.00343322753906, 245.5714874267578, -69.15013885498047, 78.21981811523438, 16.984159469604492, 55.91522979736328, 104.0960922241211, 55.7564697265625, 413.35040283203125, 41.06024932861328, 65.76296997070312, 82.94845581054688, 185.40359497070312, -25.725074768066406, -42.53279113769531, 205.10169982910156, 206.32656860351562, 140.0193634033203, 39.57648468017578, 246.778076171875, 6.34022331237793, 1.8860912322998047, 1.173980712890625, 166.229736328125, 96.92352294921875, -34.42654800415039, -36.59848403930664, 128.8219757080078, 16.406158447265625, 11.106170654296875, 270.7976379394531, 24.333648681640625, -15.277366638183594, -62.89318084716797, 63.92045593261719, 9.05426025390625, -177.0665283203125, 90.09117126464844, 360.82183837890625, 76.36860656738281, -32.46864700317383, 65.80506896972656, -4.162954330444336, 96.89019012451172, 45.53759765625, 135.06381225585938, 136.34210205078125, 77.87054443359375, -20.648799896240234, 63.635765075683594, 105.05345153808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000153.npy"}
|
||||
{"epoch": 0.22466960352422907, "step": 154, "batch_size": 64, "mean": 64.08331298828125, "std": 106.6012954711914, "min": -205.96124267578125, "p10": -54.61430282592774, "median": 56.99863052368164, "p90": 193.59736633300784, "max": 302.6189270019531, "pos_frac": 0.65625, "sample": [-9.88703727722168, 57.49940490722656, -205.96124267578125, 24.916505813598633, 302.6189270019531, 56.49785614013672, -11.487634658813477, 152.03350830078125, 178.22555541992188, -54.51351547241211, 93.73035430908203, 234.31260681152344, -89.83499145507812, 128.97665405273438, 119.41500854492188, 62.13457107543945, 2.4511051177978516, 23.32598876953125, 36.64101028442383, -4.649662017822266, 113.02639770507812, 58.3194694519043, -6.702770233154297, 64.960693359375, -27.445831298828125, 188.88058471679688, -54.65749740600586, 210.78195190429688, 153.03152465820312, -90.43986511230469, -37.540977478027344, 178.63262939453125, -75.3308334350586, 169.33114624023438, -52.901031494140625, 200.86973571777344, 298.1790771484375, 171.73431396484375, -3.7738189697265625, 179.43626403808594, 195.57875061035156, 118.89157104492188, 18.770626068115234, 78.43844604492188, 188.97413635253906, -11.585037231445312, -17.474884033203125, -8.213624954223633, -91.89745330810547, 119.19099426269531, 157.8840789794922, -23.802276611328125, 3.994779586791992, 29.026611328125, 10.986007690429688, -80.10240173339844, 111.48617553710938, 141.34463500976562, 121.00789642333984, -51.31964111328125, -7.86041259765625, 1.1787548065185547, 82.88063049316406, 279.11761474609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000154.npy"}
|
||||
{"epoch": 0.2261380323054332, "step": 155, "batch_size": 64, "mean": 64.64971923828125, "std": 95.74845886230469, "min": -212.2616729736328, "p10": -24.976444625854484, "median": 63.63713836669922, "p90": 170.57701721191415, "max": 356.83038330078125, "pos_frac": 0.734375, "sample": [-8.083381652832031, 91.2346420288086, 58.509971618652344, -7.42242431640625, 91.96157836914062, 13.481781005859375, 80.68283081054688, -28.271530151367188, 74.58609771728516, 22.36745834350586, -8.187271118164062, 65.18678283691406, 102.79256439208984, -81.21002197265625, 110.52153778076172, 73.60395050048828, -27.95828628540039, 48.433563232421875, 62.087493896484375, -0.370391845703125, 76.38394165039062, 67.86483764648438, 41.89178466796875, 356.83038330078125, 188.689208984375, 8.891447067260742, 137.1332550048828, 104.01384735107422, 9.067481994628906, -129.37196350097656, 24.16043472290039, -1.7895965576171875, 23.150856018066406, 95.11141967773438, 189.81289672851562, -212.2616729736328, 282.50213623046875, 46.533607482910156, 137.35336303710938, -15.358146667480469, 2.434467315673828, 96.27340698242188, 148.6998291015625, 44.80303955078125, 282.5453186035156, 98.35501098632812, 285.5980529785156, 151.797607421875, -76.01101684570312, 102.5621337890625, -0.9167327880859375, 178.62533569335938, -13.20615005493164, 113.26343536376953, 89.96426391601562, -49.079490661621094, 61.22590637207031, 131.6688232421875, 124.9864273071289, 12.811725616455078, 136.58226013183594, -4.034730911254883, -18.018814086914062, 72.09554290771484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000155.npy"}
|
||||
{"epoch": 0.2276064610866373, "step": 156, "batch_size": 64, "mean": 37.15288543701172, "std": 108.95697021484375, "min": -283.4932861328125, "p10": -97.74614868164062, "median": 23.362640380859375, "p90": 137.91751861572266, "max": 290.43695068359375, "pos_frac": 0.734375, "sample": [135.0177459716797, 58.966278076171875, 290.43695068359375, 258.3602294921875, 176.72735595703125, 120.79803466796875, -200.26724243164062, 2.40771484375, -86.67803955078125, 20.492149353027344, -200.25599670410156, 133.69076538085938, 99.7439956665039, -23.7413330078125, -64.91392517089844, -139.0936279296875, 41.03180694580078, 106.38809204101562, 253.2207489013672, 77.65397644042969, 21.126937866210938, 122.16644287109375, 50.141510009765625, 17.62118911743164, 3.0261173248291016, -6.730094909667969, 15.698606491088867, 31.06580352783203, 258.44818115234375, -16.22643280029297, 244.73028564453125, 139.1602783203125, 122.33340454101562, 71.2939224243164, 10.975799560546875, 7.284666061401367, 76.57847595214844, -126.0819091796875, -283.4932861328125, -80.0125503540039, 81.90335845947266, -54.05492401123047, 1.4122772216796875, 128.04359436035156, 11.552413940429688, -102.4896240234375, 84.4781265258789, 118.47112274169922, 76.00785827636719, 8.590593338012695, -4.780632019042969, 77.20198059082031, 59.37751007080078, 54.952247619628906, -109.24581146240234, -49.00856018066406, 7.2607574462890625, 20.412582397460938, 4.462921142578125, -4.3916168212890625, 25.598342895507812, 76.8227310180664, 7.651622772216797, 118.46295166015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000156.npy"}
|
||||
{"epoch": 0.2290748898678414, "step": 157, "batch_size": 64, "mean": 82.09357452392578, "std": 106.38275909423828, "min": -306.0540771484375, "p10": -28.342701148986816, "median": 73.64865112304688, "p90": 200.03189392089848, "max": 389.4761962890625, "pos_frac": 0.828125, "sample": [161.04061889648438, 167.76483154296875, 87.32076263427734, 192.2500762939453, 131.56710815429688, 10.322914123535156, 77.9969482421875, 147.61737060546875, 4.798896789550781, 152.39035034179688, 57.704837799072266, 212.62033081054688, 51.50694274902344, 91.11224365234375, 225.18206787109375, 203.36695861816406, 151.10757446289062, 107.7701416015625, 42.68257141113281, 161.97793579101562, 11.929359436035156, 39.00837707519531, 69.30035400390625, -29.01201629638672, 114.26910400390625, 251.96043395996094, -4.746040344238281, 31.886829376220703, -2.21282958984375, 115.67726135253906, -306.0540771484375, -40.500816345214844, 27.661331176757812, 52.39885711669922, 125.43904113769531, 82.07254791259766, 253.00936889648438, 129.119873046875, 108.00993347167969, -33.63114929199219, 29.037479400634766, 285.4210510253906, 34.46831512451172, 64.43445587158203, 8.323932647705078, 178.8129119873047, 107.31468200683594, -26.78096580505371, 171.80941772460938, 126.20561218261719, 188.37493896484375, 46.90437316894531, -58.98466110229492, 20.67901611328125, 187.11431884765625, 158.02484130859375, 389.4761962890625, 58.443809509277344, 46.33189392089844, -141.868408203125, 17.637969970703125, -63.518829345703125, 13.80514144897461, -21.165924072265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000157.npy"}
|
||||
{"epoch": 0.2305433186490455, "step": 158, "batch_size": 64, "mean": 87.40489959716797, "std": 133.1334228515625, "min": -180.47567749023438, "p10": -46.25967750549316, "median": 54.35513687133789, "p90": 279.46174011230477, "max": 486.6083068847656, "pos_frac": 0.71875, "sample": [8.39107894897461, 10.902673721313477, 43.38999938964844, 51.04541015625, 53.56141662597656, -30.52568817138672, 33.03133773803711, 87.01850128173828, 36.62632751464844, -50.686466217041016, 213.511962890625, -115.0401611328125, 251.72679138183594, 81.9052963256836, -12.367462158203125, -57.367523193359375, -14.73713493347168, 104.76788330078125, -12.077322006225586, 15.943199157714844, 232.5135498046875, -36.735382080078125, 288.6636962890625, -180.47567749023438, 306.41656494140625, 74.56583404541016, 144.30747985839844, 20.541414260864258, 36.91795349121094, 13.028629302978516, 79.12629699707031, 87.5238265991211, -20.73406982421875, 157.46653747558594, -55.36789321899414, -3.3957977294921875, 343.01885986328125, 103.6924819946289, -120.28150177001953, 334.7313232421875, 419.67852783203125, -3.3037872314453125, 67.98184967041016, 486.6083068847656, 18.600494384765625, 12.428802490234375, -50.34151840209961, 121.63232421875, 237.52206420898438, 110.81958770751953, 379.04901123046875, 170.06649780273438, 101.66166687011719, 174.2176055908203, -14.961027145385742, 178.517333984375, 104.11421203613281, -8.455825805664062, 26.72960662841797, 101.5287857055664, 257.9905090332031, 55.14885711669922, 161.9903564453125, -19.85470962524414], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000158.npy"}
|
||||
{"epoch": 0.23201174743024963, "step": 159, "batch_size": 64, "mean": 59.80369567871094, "std": 106.1305160522461, "min": -209.13551330566406, "p10": -57.67332382202149, "median": 59.89497375488281, "p90": 196.59541625976564, "max": 319.7441101074219, "pos_frac": 0.75, "sample": [11.798843383789062, 41.29871368408203, 142.6573028564453, -49.45042419433594, 250.99441528320312, 136.82968139648438, -209.13551330566406, -39.48856735229492, 125.97526550292969, 166.83270263671875, 91.48747253417969, -57.84953308105469, 14.787797927856445, -22.430084228515625, 90.91651916503906, -71.25882720947266, -117.20559692382812, 52.81682205200195, 186.75372314453125, 145.07257080078125, 319.7441101074219, 9.207141876220703, -47.702789306640625, 18.9053955078125, 29.622718811035156, -74.74482727050781, 20.48670196533203, -54.75140380859375, 88.30764770507812, 54.08677673339844, -21.97130584716797, 199.02456665039062, 89.09809112548828, 14.549671173095703, 117.55518341064453, -23.13899040222168, 60.423919677734375, -100.07623291015625, -4.432807922363281, 225.03787231445312, 142.7142333984375, 223.28109741210938, 62.672882080078125, 71.93079376220703, 1.3066635131835938, 274.4764709472656, 13.12852668762207, 1.6833667755126953, 72.98451232910156, 59.36602783203125, 134.78475952148438, 29.68445587158203, -57.262168884277344, -167.2371826171875, 71.0155029296875, 99.22161865234375, 97.51932525634766, 95.6416015625, 29.280466079711914, 305.8613586425781, 190.92739868164062, 68.63027954101562, 69.84251403808594, 125.34733581542969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000159.npy"}
|
||||
{"epoch": 0.23348017621145375, "step": 160, "batch_size": 64, "mean": 80.51261901855469, "std": 131.21035766601562, "min": -152.8600311279297, "p10": -69.3838020324707, "median": 65.3844985961914, "p90": 248.41092224121098, "max": 466.6248779296875, "pos_frac": 0.671875, "sample": [105.20232391357422, -11.262327194213867, 189.70867919921875, 159.1498260498047, -13.48819351196289, -70.99128723144531, 194.95556640625, 186.0095672607422, 170.89065551757812, 13.636215209960938, -80.62036895751953, -5.849090576171875, 143.4285125732422, 240.29489135742188, 121.66114044189453, -72.81153869628906, 38.81503677368164, -37.71788024902344, -145.81216430664062, 230.4324493408203, 282.510498046875, -119.38990783691406, 251.88922119140625, -63.44342803955078, 145.38424682617188, 44.121673583984375, 5.390892028808594, -12.631149291992188, 466.6248779296875, 64.89567565917969, 14.382654190063477, 65.87332153320312, -30.669769287109375, 395.14190673828125, -1.67510986328125, 74.28221130371094, 107.92784881591797, 164.1731719970703, 380.344970703125, 305.41412353515625, 31.44710922241211, 220.04443359375, -152.8600311279297, 207.03128051757812, 93.65630340576172, 14.891326904296875, -31.007158279418945, 17.610605239868164, 115.84957122802734, 111.6920166015625, -38.94928741455078, 34.09966278076172, 112.44563293457031, -65.63300323486328, 93.6822509765625, 78.94389343261719, -11.210458755493164, -0.4527759552001953, -96.56273651123047, 160.4774169921875, 31.758712768554688, 258.07806396484375, -43.14970397949219, 114.74461364746094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000160.npy"}
|
||||
{"epoch": 0.23494860499265785, "step": 161, "batch_size": 64, "mean": 70.55889129638672, "std": 105.04612731933594, "min": -134.43753051757812, "p10": -48.2220474243164, "median": 48.69301414489746, "p90": 235.35553436279304, "max": 325.6353759765625, "pos_frac": 0.765625, "sample": [77.4606704711914, 202.02749633789062, -49.53868103027344, 134.5590057373047, 303.1046447753906, -134.43753051757812, -53.287322998046875, -67.49325561523438, 99.38511657714844, -44.22106170654297, 43.10150146484375, 98.46245574951172, 79.58433532714844, -72.02006530761719, 3.797435760498047, 122.76863098144531, 325.6353759765625, 85.38706970214844, 139.62860107421875, 268.18096923828125, 15.442298889160156, 55.55350875854492, -7.22186279296875, -35.7234992980957, 34.02827453613281, -9.372516632080078, 128.772705078125, 38.41901397705078, 279.9692077636719, 302.7119445800781, 125.78291320800781, -32.17433166503906, 0.3219108581542969, -29.066959381103516, 243.7313232421875, 111.6175765991211, 36.91743469238281, -45.14990234375, 17.150833129882812, 82.75723266601562, -13.966323852539062, 26.336753845214844, 11.691902160644531, 30.254592895507812, 14.501914978027344, 25.904754638671875, 209.86074829101562, 76.26143646240234, 15.554641723632812, 148.59317016601562, 215.81202697753906, -106.99296569824219, 76.61784362792969, 168.95501708984375, 37.61662292480469, 41.16474914550781, 33.02340316772461, 128.56072998046875, 91.50495910644531, -89.52037811279297, 54.28452682495117, 255.27484130859375, 116.14910888671875, 71.77237701416016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000161.npy"}
|
||||
{"epoch": 0.23641703377386197, "step": 162, "batch_size": 64, "mean": 69.96736145019531, "std": 112.02008056640625, "min": -177.0685272216797, "p10": -44.371298217773436, "median": 61.53647232055664, "p90": 203.45760498046877, "max": 454.6783752441406, "pos_frac": 0.75, "sample": [-0.34050750732421875, 51.68807601928711, 105.09695434570312, 248.69764709472656, 61.60475158691406, 221.4638671875, 31.2498836517334, 136.92225646972656, 46.33892059326172, -7.686971664428711, -21.661203384399414, 198.84005737304688, 70.36396789550781, -111.01229095458984, -40.47212219238281, -18.681861877441406, 161.70901489257812, 166.58547973632812, -4.5296630859375, 122.15840148925781, 76.19719696044922, 28.212425231933594, 56.67805480957031, 7.4232025146484375, 21.230051040649414, 205.43655395507812, 130.75318908691406, -37.00016784667969, -177.0685272216797, 147.3805389404297, 102.40422821044922, -113.1112060546875, -15.668107986450195, 18.73868751525879, 150.82266235351562, 69.35835266113281, 233.38571166992188, 454.6783752441406, 64.19624328613281, 119.11327362060547, 85.35677337646484, 96.30400085449219, 152.04957580566406, 166.29762268066406, 145.9080047607422, 2.8889007568359375, -46.04237365722656, 0.05641937255859375, 94.97244262695312, 43.24053192138672, -84.75050354003906, 166.97589111328125, 4.6196441650390625, -94.15645599365234, 394.4644775390625, 22.885841369628906, -115.3205337524414, 30.221176147460938, 78.75163269042969, 87.50385284423828, 61.46819305419922, -32.553741455078125, 38.92195129394531, 216.35244750976562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000162.npy"}
|
||||
{"epoch": 0.23788546255506607, "step": 163, "batch_size": 64, "mean": 68.33741760253906, "std": 110.75975799560547, "min": -161.59222412109375, "p10": -50.1995361328125, "median": 60.60329818725586, "p90": 258.93974609375005, "max": 353.47332763671875, "pos_frac": 0.671875, "sample": [-7.141273498535156, -8.734115600585938, 265.11138916015625, 353.47332763671875, -27.369285583496094, -124.58625793457031, 93.9194564819336, -10.068695068359375, -9.500801086425781, 80.10557556152344, 134.28375244140625, -35.84563446044922, 104.22669219970703, 321.89990234375, 107.7421646118164, 43.41698455810547, 100.63874053955078, -17.576889038085938, 14.833274841308594, -62.16051483154297, 56.953758239746094, 155.80496215820312, 109.87646484375, -34.208351135253906, 99.14558410644531, 0.7299098968505859, 122.33930969238281, -161.59222412109375, -69.22197723388672, 110.3117904663086, 268.4188232421875, 14.288105010986328, 139.54696655273438, 12.91218376159668, 67.63882446289062, -10.557008743286133, -52.40226745605469, 299.1934814453125, 128.49948120117188, 65.97769165039062, -69.71798706054688, 33.300071716308594, -45.05982971191406, 244.53924560546875, 43.52758026123047, 104.9181137084961, -32.288063049316406, 298.05340576171875, 148.5526123046875, 31.52629280090332, 60.71631622314453, 108.98295593261719, 60.49028015136719, 181.41427612304688, 76.1912612915039, -22.84160614013672, 81.9156265258789, 12.499130249023438, -97.59927368164062, -1.3405628204345703, 268.87060546875, 91.5682373046875, 165.88092041015625, -10.828346252441406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000163.npy"}
|
||||
{"epoch": 0.2393538913362702, "step": 164, "batch_size": 64, "mean": 83.9617919921875, "std": 105.3608627319336, "min": -152.96243286132812, "p10": -22.498446464538574, "median": 70.7585220336914, "p90": 210.04867553710943, "max": 427.15570068359375, "pos_frac": 0.84375, "sample": [-145.79869079589844, 165.735595703125, 168.21871948242188, 164.44277954101562, 227.21255493164062, 51.53064727783203, 81.88818359375, 126.21014404296875, 71.17594909667969, 75.19034576416016, 245.90225219726562, 49.48252868652344, 20.963945388793945, 51.11522674560547, -66.38925170898438, 132.951416015625, 64.205078125, 66.96712493896484, -22.862268447875977, 70.39354705810547, 4.643720626831055, 131.73245239257812, 71.12349700927734, 198.24578857421875, 31.146608352661133, 154.2598419189453, -26.27291488647461, -90.22249603271484, 146.8785400390625, 16.2722110748291, 45.15989685058594, -17.900436401367188, 153.88978576660156, 178.49575805664062, 141.60635375976562, 113.58042907714844, 49.80689239501953, 102.55961608886719, 16.6402587890625, 170.1090545654297, 38.399200439453125, 89.58656311035156, 67.37547302246094, 31.711669921875, 45.442832946777344, 9.471649169921875, 71.61297607421875, 35.52030944824219, -152.96243286132812, 138.26058959960938, 75.6522445678711, 48.66304016113281, 339.0492858886719, 7.25909423828125, 343.2734680175781, 244.88751220703125, 0.73260498046875, 215.1070556640625, 85.29676055908203, -55.82969665527344, -21.64952850341797, 104.52316284179688, 427.15570068359375, -5.275611877441406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000164.npy"}
|
||||
{"epoch": 0.24082232011747431, "step": 165, "batch_size": 64, "mean": 53.86854553222656, "std": 100.76016998291016, "min": -151.38934326171875, "p10": -80.43705673217774, "median": 38.56016159057617, "p90": 205.93166198730478, "max": 280.81622314453125, "pos_frac": 0.703125, "sample": [93.32916259765625, 90.90364837646484, -84.39227294921875, 19.198741912841797, -92.9344482421875, -2.6824684143066406, 76.85248565673828, 117.97055053710938, -16.151077270507812, -82.46891021728516, 128.62860107421875, -151.38934326171875, -81.36885070800781, 35.48545837402344, 34.40718078613281, -96.32714080810547, -10.463184356689453, -49.3284912109375, 160.62893676757812, 148.552490234375, 83.30101013183594, 26.51708984375, 16.514957427978516, 280.81622314453125, -146.0260772705078, 36.294952392578125, -19.410017013549805, -47.34156036376953, -4.440582275390625, 142.38858032226562, -78.26287078857422, 17.35700035095215, 214.09030151367188, 184.46035766601562, 48.452117919921875, -25.37464141845703, 81.45311737060547, 61.56785583496094, 78.2337417602539, 4.295042037963867, 40.82537078857422, 61.06262969970703, 156.6898651123047, 76.6985855102539, 216.90682983398438, -18.517013549804688, 98.69482421875, 60.464599609375, 4.595924377441406, -72.81117248535156, 30.317047119140625, 225.39938354492188, 27.09217071533203, -45.572364807128906, 60.27833938598633, 270.12823486328125, 27.548309326171875, 266.940673828125, 220.134033203125, 121.23771667480469, 114.70964050292969, 12.221473693847656, 112.30926513671875, 186.89483642578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000165.npy"}
|
||||
{"epoch": 0.2422907488986784, "step": 166, "batch_size": 64, "mean": 99.76808166503906, "std": 140.90550231933594, "min": -93.13206481933594, "p10": -36.639979553222645, "median": 67.35939979553223, "p90": 275.68581848144544, "max": 567.310546875, "pos_frac": 0.796875, "sample": [458.46575927734375, 318.0750732421875, 31.326580047607422, 497.30902099609375, -40.59046173095703, 3.867115020751953, 20.100505828857422, 71.93490600585938, 174.06369018554688, 44.723724365234375, 130.7084197998047, -10.531227111816406, 11.748512268066406, 161.0897674560547, -67.25267028808594, 79.22901916503906, 161.17843627929688, 20.087203979492188, -2.084972381591797, 193.1771240234375, 4.512796401977539, 567.310546875, 41.07619094848633, -51.0707893371582, -55.919837951660156, -52.60776138305664, 39.36595916748047, 147.46041870117188, -93.13206481933594, 240.00057983398438, 290.9794921875, 471.7228088378906, 113.00089263916016, 71.36122131347656, 27.757614135742188, 366.47625732421875, 13.782981872558594, 204.3936767578125, 28.416900634765625, 156.2817840576172, -15.935565948486328, 86.63619995117188, 18.745834350585938, 11.994247436523438, 176.81024169921875, 129.13015747070312, 213.9091033935547, -5.634967803955078, 75.27725219726562, 172.14947509765625, 32.057918548583984, 86.63078308105469, -63.28961181640625, 63.35757827758789, -5.199989318847656, -27.42218780517578, 119.4989242553711, 1.7649993896484375, 118.52595520019531, 5.7782440185546875, 91.37542724609375, 17.796125411987305, 206.24954223632812, 87.1561508178711], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000166.npy"}
|
||||
{"epoch": 0.24375917767988253, "step": 167, "batch_size": 64, "mean": 105.04924011230469, "std": 117.84534454345703, "min": -137.65362548828125, "p10": -19.9156379699707, "median": 88.15605926513672, "p90": 255.92240447998057, "max": 435.33892822265625, "pos_frac": 0.828125, "sample": [105.78132629394531, 63.739131927490234, 147.19619750976562, 215.6769561767578, 115.3182373046875, 124.14129638671875, 91.50396728515625, 70.31278228759766, 96.58576965332031, 48.48805618286133, 435.33892822265625, 192.00936889648438, 73.75558471679688, -15.8115234375, 27.3853759765625, 62.380680084228516, 82.96517181396484, -59.51848602294922, 131.17916870117188, 187.43914794921875, 22.376718521118164, 42.005615234375, 78.95674133300781, 57.22931671142578, 107.46083068847656, -137.65362548828125, 279.86260986328125, 229.34805297851562, -21.320709228515625, 173.8180694580078, -12.358848571777344, 105.36256408691406, 77.68516540527344, 60.208831787109375, 376.66534423828125, 180.859619140625, 268.5859680175781, 84.80815124511719, 208.55409240722656, -127.7354736328125, 229.9967803955078, 267.03338623046875, 331.7766418457031, 147.89193725585938, 410.94561767578125, -16.63713836669922, 179.32003784179688, 146.50692749023438, 220.429443359375, 177.38047790527344, 99.37727355957031, 11.221237182617188, -4.405759811401367, 32.068824768066406, 63.47703552246094, 134.00579833984375, 98.6823501586914, 58.752296447753906, 22.050216674804688, -101.3241195678711, 24.292964935302734, 13.549081802368164, -52.30348205566406, -21.522705078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000167.npy"}
|
||||
{"epoch": 0.24522760646108663, "step": 168, "batch_size": 64, "mean": 77.36841583251953, "std": 105.36457824707031, "min": -87.0561752319336, "p10": -49.16412849426269, "median": 54.86359214782715, "p90": 201.2569549560547, "max": 425.1435546875, "pos_frac": 0.796875, "sample": [20.2237548828125, 122.12883758544922, 27.342479705810547, 22.236186981201172, 4.788488388061523, 32.547550201416016, 140.4140625, -46.81093978881836, 23.197010040283203, 12.325538635253906, -85.4559326171875, 187.11732482910156, 425.1435546875, 91.76275634765625, -50.172637939453125, 197.091796875, 179.92584228515625, 233.2145233154297, 203.04202270507812, 49.52240753173828, 220.98455810546875, 271.1435546875, 181.18234252929688, 170.21566772460938, -46.520729064941406, -55.374786376953125, -14.802513122558594, 132.05686950683594, 150.6558837890625, 134.72781372070312, 92.78382873535156, 32.70756530761719, -63.592140197753906, 98.02510070800781, 36.32963562011719, 118.36019897460938, -71.11531066894531, -10.905147552490234, -28.459110260009766, 33.32655334472656, 28.240327835083008, 92.18912506103516, 64.5655517578125, 95.05375671386719, 115.79887390136719, 67.60635375976562, 28.919279098510742, -61.24773406982422, 278.63494873046875, 58.679588317871094, 156.9958038330078, 22.278379440307617, 27.636049270629883, 27.063507080078125, 129.07833862304688, 56.90037536621094, 28.05988121032715, -87.0561752319336, 52.82680892944336, -35.35614013671875, 376.5641174316406, 19.74732208251953, 95.81056213378906, 141.27532958984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000168.npy"}
|
||||
{"epoch": 0.24669603524229075, "step": 169, "batch_size": 64, "mean": 67.25150299072266, "std": 130.41305541992188, "min": -278.51068115234375, "p10": -65.56857528686523, "median": 44.435529708862305, "p90": 221.2905197143555, "max": 559.8470458984375, "pos_frac": 0.75, "sample": [77.12918853759766, 87.72942352294922, 2.6770362854003906, 17.179237365722656, 215.95896911621094, 112.43114471435547, 17.072525024414062, 159.64308166503906, 91.15335845947266, 35.6613883972168, -1.2204170227050781, 47.246238708496094, -278.51068115234375, 3.0577430725097656, 59.73982620239258, -25.207565307617188, 25.305931091308594, -83.70726013183594, 87.26791381835938, 170.46713256835938, 156.96847534179688, -53.26298522949219, 66.85647583007812, -89.35009765625, 208.29263305664062, 223.57546997070312, 63.3774299621582, 4.50823974609375, -15.904159545898438, 31.43706512451172, 209.09609985351562, -6.744117736816406, 19.5330753326416, 72.1963119506836, 92.671875, 128.6515655517578, 254.00851440429688, 25.73212242126465, 69.59909057617188, 234.3160858154297, 6.369474411010742, 386.58306884765625, -123.7557601928711, 88.98876953125, 6.238670349121094, 58.794212341308594, 29.34112548828125, -70.84239959716797, 341.67633056640625, -16.250411987304688, 559.8470458984375, -4.5849761962890625, 96.2872085571289, 14.546791076660156, 292.9928283691406, -9.090911865234375, 83.39244079589844, 17.0482177734375, 41.624820709228516, -31.71662139892578, -127.39253234863281, 92.44425964355469, 198.89439392089844, -143.973388671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000169.npy"}
|
||||
{"epoch": 0.24816446402349487, "step": 170, "batch_size": 64, "mean": 79.6080093383789, "std": 107.50672912597656, "min": -230.70791625976562, "p10": -31.328610229492185, "median": 65.97804260253906, "p90": 217.2148651123047, "max": 331.7049255371094, "pos_frac": 0.828125, "sample": [189.07269287109375, -137.26397705078125, 135.63946533203125, 85.95503997802734, 216.690185546875, 82.13070678710938, 133.07638549804688, 13.48101806640625, 143.60379028320312, 4.0812225341796875, 90.2801513671875, -4.905719757080078, 76.209228515625, -36.837310791015625, 9.114522933959961, -50.20855712890625, 115.29468536376953, 237.07162475585938, 16.80401611328125, 304.0160827636719, 4.687892913818359, -22.038442611694336, 173.62806701660156, 47.85878372192383, 2.3967819213867188, 21.771766662597656, -230.70791625976562, -28.225318908691406, 73.01200866699219, 197.97360229492188, 70.1759033203125, 127.27313232421875, 57.32497787475586, 79.90666198730469, 179.75540161132812, -52.02320861816406, 312.7408752441406, 5.8968048095703125, 9.042264938354492, 331.7049255371094, 74.38697814941406, -98.11317443847656, 57.841373443603516, 118.06364440917969, 200.56494140625, -32.658592224121094, 27.865015029907227, 4.022775650024414, 38.55005645751953, 61.780181884765625, 91.82234954833984, 47.71570587158203, 238.35067749023438, 40.985713958740234, 136.00802612304688, 57.89878845214844, 187.49891662597656, 217.43972778320312, -17.234554290771484, 196.811767578125, 237.293701171875, 10.764116287231445, 194.20510864257812, 17.58913803100586], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000170.npy"}
|
||||
{"epoch": 0.24963289280469897, "step": 171, "batch_size": 64, "mean": 64.2116470336914, "std": 134.4303436279297, "min": -262.27056884765625, "p10": -89.86251602172851, "median": 58.528282165527344, "p90": 212.06755676269532, "max": 585.4961547851562, "pos_frac": 0.6875, "sample": [-79.81629943847656, 219.65162658691406, 19.664710998535156, 93.28742980957031, -102.3691177368164, -49.242835998535156, 7.4203033447265625, 107.730224609375, -77.05718994140625, 138.3459014892578, 12.248374938964844, 156.39698791503906, 84.34341430664062, 109.14848327636719, 50.46794128417969, -77.64810943603516, 101.97938537597656, -98.17070007324219, 111.24620056152344, -8.112266540527344, 208.82696533203125, 106.37733459472656, 52.29148864746094, 87.005615234375, 335.5057067871094, 72.47222900390625, -98.93183135986328, 585.4961547851562, -94.16803741455078, 48.070892333984375, 64.76507568359375, -262.27056884765625, 431.319580078125, 138.25137329101562, 91.64227294921875, -114.37673950195312, 212.488525390625, 105.71611022949219, 8.115142822265625, 306.568115234375, -0.021484375, 67.07201385498047, 211.08529663085938, -23.524656295776367, -95.30079650878906, 11.9384765625, 65.01283264160156, 85.51454162597656, -60.1773796081543, 106.70347595214844, -25.73554229736328, -61.146240234375, 139.73764038085938, 40.96106719970703, 82.95095825195312, -19.564722061157227, 33.727691650390625, 159.38082885742188, -19.633766174316406, -13.43902587890625, 88.0879898071289, 17.24045181274414, 264.00823974609375, 49.98775863647461], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000171.npy"}
|
||||
{"epoch": 0.2511013215859031, "step": 172, "batch_size": 64, "mean": 68.72427368164062, "std": 113.76294708251953, "min": -197.5997314453125, "p10": -71.0709014892578, "median": 74.08094787597656, "p90": 196.8456756591797, "max": 477.4154052734375, "pos_frac": 0.703125, "sample": [-61.56288146972656, 104.24256896972656, -100.6605224609375, 88.93963623046875, 37.68231201171875, -47.367454528808594, 146.36465454101562, 101.58680725097656, 74.01374053955078, -75.14576721191406, -126.10638427734375, 53.19636154174805, 51.991798400878906, -76.9537353515625, 154.29696655273438, 88.22663879394531, 20.352245330810547, 71.32305908203125, -50.05154800415039, 72.81856536865234, -33.00212860107422, 93.91018676757812, 190.58453369140625, 144.19329833984375, 20.841651916503906, 74.14815521240234, 229.30667114257812, 199.52902221679688, -76.53001403808594, 263.427734375, -28.242538452148438, 111.38176727294922, 104.91558074951172, 90.87045288085938, 119.62722778320312, 307.69805908203125, 477.4154052734375, 44.92195129394531, 127.13372039794922, 31.499454498291016, -20.26360321044922, -43.849037170410156, 77.9085693359375, 79.4219970703125, 185.96240234375, -25.67803955078125, 13.01698112487793, 24.710126876831055, -30.837677001953125, 172.86187744140625, 158.31040954589844, -4.1725921630859375, -56.422332763671875, 101.2314453125, 107.4374771118164, -197.5997314453125, 72.81813049316406, 105.1297607421875, 228.00900268554688, 98.47184753417969, -16.663291931152344, -81.3362045288086, 248.2403564453125, 180.82867431640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000172.npy"}
|
||||
{"epoch": 0.2525697503671072, "step": 173, "batch_size": 64, "mean": 67.58930969238281, "std": 96.3588638305664, "min": -186.70962524414062, "p10": -27.002701950073238, "median": 50.85917091369629, "p90": 208.99430236816414, "max": 350.13751220703125, "pos_frac": 0.796875, "sample": [44.691192626953125, 78.45745086669922, 28.44734001159668, -3.1277389526367188, 0.5102996826171875, 52.15159606933594, 2.102825164794922, 139.38131713867188, 119.05183410644531, 9.458892822265625, -16.989501953125, -43.2431640625, 16.28758430480957, 0.7807693481445312, 104.52381896972656, 263.02178955078125, 70.0145492553711, 58.996158599853516, 70.55950164794922, 176.71405029296875, 115.33955383300781, 87.17535400390625, 83.80960845947266, -48.22039031982422, 53.02849578857422, 30.587547302246094, 350.13751220703125, 28.041522979736328, 163.1488037109375, 216.995361328125, 65.35507202148438, 18.08870506286621, -60.063995361328125, 116.00298309326172, 101.793701171875, -22.655929565429688, 118.55529022216797, 3.61297607421875, -2.2142276763916016, 230.6927490234375, 61.5976448059082, 190.32516479492188, -28.865604400634766, 39.77771759033203, -6.783821105957031, 17.041454315185547, 229.38754272460938, 116.54559326171875, -45.42639923095703, -1.0452880859375, 94.99334716796875, 49.56674575805664, 128.82313537597656, -68.66201782226562, -186.70962524414062, 336.168701171875, 37.03575897216797, 5.491565704345703, 93.10604858398438, 20.121295928955078, 270.5169372558594, 61.04972839355469, 46.622501373291016, 44.036224365234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000173.npy"}
|
||||
{"epoch": 0.2540381791483113, "step": 174, "batch_size": 64, "mean": 79.87288665771484, "std": 118.65287017822266, "min": -153.55213928222656, "p10": -52.55796585083007, "median": 59.16422653198242, "p90": 257.6055648803712, "max": 454.4465637207031, "pos_frac": 0.765625, "sample": [23.22722625732422, -6.7552642822265625, -54.79027557373047, -87.57698059082031, -4.3173828125, 14.527351379394531, -14.430980682373047, 13.069509506225586, 113.7577896118164, 203.1428985595703, 223.61131286621094, -1.4463577270507812, 63.837432861328125, 19.021146774291992, 58.65656280517578, -47.3492431640625, 113.6920166015625, 162.05613708496094, 150.59445190429688, 272.1745300292969, 58.35704040527344, 35.27022933959961, 59.67189025878906, -16.10898208618164, 330.2198181152344, -83.58013916015625, 133.78045654296875, 298.9754638671875, 29.422714233398438, 93.51612854003906, 13.12989616394043, -14.322031021118164, 61.012481689453125, -81.65950012207031, 141.08282470703125, -46.982032775878906, 166.00790405273438, 109.27477264404297, -119.42251586914062, 288.429931640625, 311.06890869140625, 328.2022399902344, 32.20647430419922, 123.47421264648438, 12.594400405883789, -153.55213928222656, 3.369901657104492, 142.16305541992188, 119.11677551269531, 145.91061401367188, 78.74639892578125, 50.08561706542969, 24.23230743408203, 141.10699462890625, -95.22782135009766, 181.59632873535156, 152.7963409423828, 95.102294921875, 47.49010467529297, 13.635128021240234, 454.4465637207031, 131.25950622558594, 70.47879028320312, 30.78354835510254], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000174.npy"}
|
||||
{"epoch": 0.2555066079295154, "step": 175, "batch_size": 64, "mean": 109.91596221923828, "std": 109.30248260498047, "min": -86.94302368164062, "p10": -8.455738067626951, "median": 87.88285827636719, "p90": 241.4574737548828, "max": 435.2728271484375, "pos_frac": 0.828125, "sample": [21.749181747436523, 37.54454803466797, 57.82658386230469, -86.94302368164062, 87.09626770019531, 153.65634155273438, 228.62307739257812, 85.42479705810547, 23.989282608032227, 146.6823272705078, 322.950927734375, -72.92796325683594, -9.409866333007812, 312.8867492675781, 118.56472778320312, 35.4345703125, 221.67947387695312, 218.5330047607422, 238.05563354492188, 163.49069213867188, 181.06817626953125, -61.276710510253906, 201.81805419921875, 435.2728271484375, 39.53607177734375, 133.648193359375, 324.0606689453125, 49.899818420410156, 133.95343017578125, 83.34037017822266, -6.229438781738281, 54.263648986816406, 310.1455993652344, 18.47903060913086, 4.019233703613281, 38.68457794189453, -1.7557563781738281, 265.6088562011719, 8.386978149414062, 84.09288787841797, 123.7314453125, 117.34820556640625, 125.0355453491211, 222.1778564453125, -1.7021961212158203, 221.455322265625, 242.9154052734375, 183.3749237060547, -25.769702911376953, 101.75157928466797, 174.22616577148438, 94.44316864013672, 88.66944885253906, 153.64822387695312, 44.521583557128906, 72.02793884277344, 141.8016815185547, -9.769403457641602, 43.19641876220703, 235.34555053710938, -40.10504913330078, 70.69065856933594, -0.6289825439453125, 54.31170654296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000175.npy"}
|
||||
{"epoch": 0.25697503671071953, "step": 176, "batch_size": 64, "mean": 91.51618957519531, "std": 121.04995727539062, "min": -63.20860290527344, "p10": -35.57849044799804, "median": 64.62747192382812, "p90": 225.26477203369143, "max": 507.38775634765625, "pos_frac": 0.78125, "sample": [6.040130615234375, 30.37567138671875, 98.85832214355469, 206.5806121826172, 68.14401245117188, 28.672077178955078, 10.887199401855469, 324.2493591308594, 17.821426391601562, -26.18365478515625, -37.170814514160156, 472.4325256347656, -15.021116256713867, -63.20860290527344, 236.32958984375, 71.27005004882812, 26.250823974609375, 219.3449249267578, 179.72323608398438, -7.346534729003906, -3.3500213623046875, 70.13260650634766, 148.4080352783203, -48.813209533691406, 227.80184936523438, 3.3029747009277344, 29.63020133972168, 194.5563507080078, 178.93096923828125, 10.296791076660156, 116.38294982910156, -60.84893798828125, 149.30291748046875, -48.981056213378906, 41.06834411621094, 171.22421264648438, -43.381587982177734, 82.22611236572266, 182.33526611328125, 139.5215606689453, 190.0406494140625, 0.1099700927734375, -41.24256896972656, -0.5920600891113281, 156.41876220703125, 140.01565551757812, 128.71481323242188, 72.88025665283203, 140.54342651367188, 285.91485595703125, 35.46128845214844, -31.863067626953125, 25.200347900390625, 13.664373397827148, 138.0795440673828, 61.110931396484375, -15.393600463867188, 12.98678207397461, 130.62338256835938, 1.3003196716308594, 167.81503295898438, 507.38775634765625, 7.854713439941406, 342.20880126953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000176.npy"}
|
||||
{"epoch": 0.25844346549192365, "step": 177, "batch_size": 64, "mean": 91.91362762451172, "std": 119.79065704345703, "min": -107.15956115722656, "p10": -21.937765502929683, "median": 69.43682479858398, "p90": 245.03480834960948, "max": 587.4664306640625, "pos_frac": 0.75, "sample": [166.9478759765625, 49.52874755859375, 220.0040740966797, 122.82691192626953, 64.1234130859375, 128.19351196289062, -107.15956115722656, 90.08621215820312, 210.12832641601562, 168.23904418945312, 255.67820739746094, 45.749473571777344, 92.47411346435547, 260.2010192871094, 130.5270233154297, 84.36561584472656, 20.22356414794922, 363.6938781738281, 29.62420654296875, 169.35992431640625, 71.39874267578125, 178.0516357421875, -75.89056396484375, 150.94940185546875, -3.0220985412597656, 43.855255126953125, 201.53912353515625, -23.6158447265625, -7.7573089599609375, 5.74969482421875, 67.47490692138672, 289.12957763671875, 104.53968811035156, -16.764007568359375, 27.08538818359375, 81.67216491699219, -18.022247314453125, 258.23443603515625, 0.4106616973876953, -27.985082626342773, -77.70570373535156, 151.3128662109375, -4.214656829833984, 50.56725311279297, -80.4508056640625, 154.8612823486328, 127.03181457519531, 16.29263687133789, -11.497596740722656, -4.52239990234375, 220.20021057128906, 10.087724685668945, 82.16818237304688, -14.82476806640625, 190.6954345703125, 37.33858871459961, 37.63511657714844, -26.85261344909668, 165.18618774414062, 40.925750732421875, 587.4664306640625, 289.16632080078125, 86.4887924194336, -16.732833862304688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000177.npy"}
|
||||
{"epoch": 0.2599118942731278, "step": 178, "batch_size": 64, "mean": 115.63043975830078, "std": 144.10374450683594, "min": -139.31419372558594, "p10": -36.25558395385741, "median": 96.29513549804688, "p90": 322.2686462402345, "max": 572.281005859375, "pos_frac": 0.765625, "sample": [54.10666275024414, 18.934814453125, 165.58749389648438, -12.821746826171875, 142.08099365234375, 91.3922348022461, 112.6785659790039, 38.838470458984375, 21.83417510986328, 294.10736083984375, -0.9043045043945312, 118.75372314453125, 287.0229187011719, 111.77191162109375, 51.37501525878906, 158.6597900390625, 403.3098449707031, 116.19105529785156, 572.281005859375, -55.44630432128906, 209.82371520996094, 61.22126007080078, 13.732391357421875, 66.1924819946289, -4.493280410766602, 182.98663330078125, 386.9621276855469, -50.572166442871094, 58.330230712890625, -3.5035629272460938, -78.44651794433594, 99.66055297851562, 222.30160522460938, 22.767078399658203, 130.44647216796875, 192.6047821044922, 134.77127075195312, -94.301513671875, 65.42481231689453, 260.58795166015625, 31.398571014404297, 117.27452087402344, -139.31419372558594, 446.2888488769531, -10.657159805297852, 158.33041381835938, 120.02189636230469, 160.98495483398438, -41.09436798095703, 458.1703796386719, 36.801048278808594, 142.06515502929688, 347.7118225097656, -4.885873794555664, 16.255859375, 222.8315887451172, 92.92971801757812, -24.965087890625, 64.18760681152344, 219.98468017578125, 334.3377685546875, -90.82888793945312, 186.89907836914062, -10.630134582519531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000178.npy"}
|
||||
{"epoch": 0.26138032305433184, "step": 179, "batch_size": 64, "mean": 80.14815521240234, "std": 140.57762145996094, "min": -288.9915771484375, "p10": -78.82952270507812, "median": 79.91036224365234, "p90": 296.60810852050787, "max": 386.7801208496094, "pos_frac": 0.6875, "sample": [118.1596908569336, 161.3231658935547, 177.95120239257812, 386.7801208496094, 61.317481994628906, -95.35015869140625, 187.7766876220703, 23.988140106201172, -21.084213256835938, 13.683378219604492, 83.5926513671875, 82.20540618896484, 46.80413818359375, 93.59711456298828, -14.432929992675781, 97.77616882324219, -79.9471435546875, 317.9468078613281, -59.06380844116211, -50.64154052734375, 74.37906646728516, -76.22174072265625, 356.8216247558594, 25.36960792541504, 230.51670837402344, 126.0455322265625, 159.9047393798828, 300.8836669921875, 170.48171997070312, 37.94551086425781, 81.61322021484375, 286.6318054199219, 343.53753662109375, 94.244140625, 80.07225036621094, 130.20936584472656, -36.8311653137207, -32.68855667114258, -1.0793838500976562, 89.7564468383789, 73.21729278564453, -36.828582763671875, 79.74847412109375, -92.40583801269531, 128.49623107910156, 332.6499938964844, -91.79967498779297, 242.5204620361328, 115.4428939819336, -119.16204071044922, -54.02680206298828, 242.1188507080078, 31.255386352539062, 245.02679443359375, 307.9029846191406, 217.5767059326172, -171.8388671875, 138.9603271484375, -58.87049102783203, -70.19285583496094, -288.9915771484375, 15.7867431640625, 23.726308822631836, -54.80530548095703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000179.npy"}
|
||||
{"epoch": 0.26284875183553597, "step": 180, "batch_size": 64, "mean": 103.84219360351562, "std": 127.082275390625, "min": -209.7353973388672, "p10": -24.787814903259278, "median": 88.10892105102539, "p90": 267.4300903320313, "max": 500.0198974609375, "pos_frac": 0.796875, "sample": [103.5105972290039, 163.63021850585938, -84.51838684082031, 3.247478485107422, -69.53398895263672, 500.0198974609375, -24.39803695678711, 68.71945190429688, 105.59915161132812, 60.317535400390625, 5.113574981689453, 231.10031127929688, -209.7353973388672, 139.25213623046875, 185.17738342285156, 120.90267944335938, 86.96788024902344, 86.49710083007812, 109.04695892333984, 65.01506042480469, 168.81521606445312, -149.03712463378906, 59.26237869262695, 295.3663024902344, 70.77997589111328, 233.68121337890625, 166.22409057617188, -3.6300125122070312, 39.044654846191406, 55.01454544067383, 282.76690673828125, 3.193490982055664, 257.917724609375, 160.015380859375, 77.48433685302734, -4.94837760925293, 69.71073150634766, 208.54598999023438, -20.03636932373047, 183.58187866210938, 89.24996185302734, 19.22724151611328, 244.1429443359375, 66.30436706542969, -132.67535400390625, 85.93852233886719, -0.713470458984375, 262.30450439453125, 245.38380432128906, 91.53162384033203, 152.3446044921875, 271.8307800292969, -24.954862594604492, 340.6273193359375, 91.33463287353516, -23.703445434570312, 197.22073364257812, 154.8586883544922, 28.32170867919922, -76.2656478881836, 269.62677001953125, 282.40399169921875, 29.75151824951172, 182.12496948242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000180.npy"}
|
||||
{"epoch": 0.2643171806167401, "step": 181, "batch_size": 64, "mean": 64.73393249511719, "std": 117.56401062011719, "min": -212.371337890625, "p10": -60.10828399658203, "median": 45.760847091674805, "p90": 212.33860473632825, "max": 396.2471923828125, "pos_frac": 0.765625, "sample": [137.7688446044922, -61.423255920410156, 15.926233291625977, 63.84396743774414, 112.1402816772461, -3.442323684692383, 33.601497650146484, 27.38005828857422, 86.9144058227539, 129.00819396972656, -6.026096343994141, 16.833955764770508, 4.662223815917969, 18.30596160888672, -161.8089599609375, 224.58270263671875, 115.76100158691406, -29.32396697998047, 82.13848876953125, -18.65555191040039, 38.00368118286133, -59.73834228515625, -16.4208984375, 227.1697540283203, -82.32810974121094, 88.66976928710938, 135.93453979492188, 287.251953125, 47.18073272705078, 56.38688659667969, 137.7940673828125, 81.41515350341797, -206.08995056152344, 47.251792907714844, 23.034332275390625, 84.69497680664062, 12.457597732543945, 183.57936096191406, 130.019775390625, -60.26683044433594, 15.82244873046875, -212.371337890625, 227.873046875, -18.763538360595703, 353.99359130859375, 170.6468963623047, -159.0384063720703, 52.06696319580078, -31.055198669433594, 137.34609985351562, 176.76138305664062, 396.2471923828125, 156.53179931640625, 40.27798080444336, 135.17274475097656, 7.381877899169922, 5.031976699829102, 2.3440780639648438, 289.2879638671875, 44.34096145629883, 29.684181213378906, 183.76904296875, 152.37890625, 43.05298614501953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000181.npy"}
|
||||
{"epoch": 0.2657856093979442, "step": 182, "batch_size": 64, "mean": 114.658935546875, "std": 139.10092163085938, "min": -248.81594848632812, "p10": -54.9115077972412, "median": 105.5335464477539, "p90": 288.75898437500007, "max": 398.6893310546875, "pos_frac": 0.828125, "sample": [292.9827880859375, 78.73104858398438, 326.6080322265625, 201.73385620117188, 87.989013671875, 225.1903533935547, 240.15728759765625, 278.9034423828125, 354.04241943359375, 73.68341064453125, 128.51365661621094, 46.49259567260742, 28.160402297973633, -16.302783966064453, 275.89990234375, -248.81594848632812, 237.01107788085938, 65.49613952636719, 5.108518600463867, 48.68595886230469, 398.6893310546875, -3.441099166870117, 262.7229919433594, 121.03266906738281, 156.55426025390625, -61.099510192871094, 254.70620727539062, 90.034423828125, 14.744319915771484, 67.95519256591797, 148.85568237304688, 277.6973876953125, -116.69710540771484, -127.59160614013672, 166.84402465820312, 244.15283203125, 48.513336181640625, 147.10690307617188, 226.83071899414062, 235.33018493652344, -65.97342681884766, 76.14275360107422, 33.51329040527344, 136.52085876464844, 162.77391052246094, 179.38742065429688, 185.80023193359375, 128.1341094970703, -133.6134796142578, 5.196403503417969, 387.1581726074219, 146.9771728515625, 51.25251770019531, 9.823732376098633, -31.514373779296875, 6.141674041748047, -129.88302612304688, 71.10472106933594, 296.6849060058594, 19.179725646972656, 356.4549560546875, -40.472835540771484, 45.735023498535156, 158.43479919433594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000182.npy"}
|
||||
{"epoch": 0.26725403817914833, "step": 183, "batch_size": 64, "mean": 61.56635665893555, "std": 112.04437255859375, "min": -159.7234344482422, "p10": -81.91487197875976, "median": 58.70620155334473, "p90": 196.79857788085943, "max": 355.31103515625, "pos_frac": 0.765625, "sample": [19.454299926757812, 76.65479278564453, -7.325223922729492, 56.27925109863281, 15.772151947021484, 175.9486846923828, 151.2696075439453, 141.46157836914062, 42.18890380859375, 52.75267791748047, -83.25495910644531, 141.9830322265625, 65.01518249511719, 17.051721572875977, 29.07501220703125, 88.03447723388672, 338.68585205078125, 247.5704345703125, 143.1065216064453, 28.69093132019043, 65.8365249633789, 118.71992492675781, 301.43524169921875, 98.10586547851562, -15.365486145019531, 5.241554260253906, 69.48931884765625, 61.13315200805664, 0.835174560546875, -159.7234344482422, 180.9149169921875, 85.20333099365234, -54.515106201171875, 13.48521614074707, 182.73739624023438, 70.33218383789062, 44.88228988647461, 100.4394760131836, 133.9619140625, 238.48306274414062, -135.4040985107422, -43.18439483642578, 355.31103515625, -78.78800201416016, 26.94713592529297, -36.99665832519531, 82.67940521240234, -100.27810668945312, -128.23419189453125, -110.62982177734375, 159.76519775390625, 112.28253173828125, 205.97874450683594, 202.82479858398438, 90.79096984863281, 52.847068786621094, 27.918128967285156, -73.12812042236328, 153.98886108398438, 86.64990234375, -154.58685302734375, -62.390079498291016, 23.1707763671875, 0.6651153564453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000183.npy"}
|
||||
{"epoch": 0.2687224669603524, "step": 184, "batch_size": 64, "mean": 65.5010986328125, "std": 114.14273834228516, "min": -155.44125366210938, "p10": -49.804735565185545, "median": 33.50018501281738, "p90": 211.72426757812502, "max": 347.9111633300781, "pos_frac": 0.75, "sample": [29.442108154296875, -108.43221282958984, 65.34984588623047, 81.61365509033203, 204.3796844482422, 195.02020263671875, 2.971405029296875, 14.4266357421875, 161.39151000976562, -17.231712341308594, 319.4879150390625, 347.9111633300781, 63.646728515625, 219.51119995117188, -46.31969451904297, 204.11639404296875, 10.449264526367188, 214.2528076171875, 96.26113891601562, 287.9521484375, -23.61920166015625, 311.17230224609375, -88.293212890625, 16.800155639648438, 41.35942459106445, 37.210357666015625, 43.09368896484375, 5.700447082519531, 89.11306762695312, 3.908620834350586, 37.10383605957031, 98.09640502929688, 152.1560516357422, 202.01905822753906, -92.62928771972656, 205.8243408203125, 17.106971740722656, -23.985923767089844, -41.0647087097168, 24.624874114990234, 74.65047454833984, -19.713821411132812, 31.800369262695312, 6.222208023071289, 14.590141296386719, 200.0603485107422, 72.76458740234375, 55.61098861694336, -53.21612548828125, -132.3682403564453, -51.29832458496094, 24.8355770111084, -5.213230133056641, 3.4405059814453125, -155.44125366210938, 34.08213806152344, -41.423065185546875, 177.19369506835938, 32.91823196411133, 109.0103988647461, 17.498497009277344, -1.082672119140625, 99.39936828613281, 335.8515930175781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000184.npy"}
|
||||
{"epoch": 0.2701908957415565, "step": 185, "batch_size": 64, "mean": 51.37147521972656, "std": 123.07954406738281, "min": -195.67514038085938, "p10": -65.13702812194823, "median": 27.264354705810547, "p90": 205.4223220825196, "max": 429.9853820800781, "pos_frac": 0.609375, "sample": [-162.80783081054688, -67.66688537597656, -21.135393142700195, 85.56881713867188, 81.32414245605469, 17.33884620666504, -5.581220626831055, 76.71903991699219, 410.0514831542969, -0.10239410400390625, 17.424510955810547, 110.7847671508789, 107.59173583984375, -7.208225250244141, 264.133056640625, 35.06982421875, -9.872882843017578, 35.075416564941406, 142.6636505126953, -116.99212646484375, -9.968955993652344, 38.61940002441406, 26.35598373413086, 77.04429626464844, 80.73941040039062, -6.052619934082031, -118.81037902832031, 22.70264434814453, 87.27124786376953, -137.21530151367188, 38.501922607421875, 21.230220794677734, 93.74945068359375, 97.06199645996094, 26.530426025390625, 368.2930908203125, -53.04481506347656, 29.940040588378906, 190.44415283203125, -44.61690902709961, 34.59629440307617, 0.3412761688232422, -19.398048400878906, -9.192245483398438, -24.3110294342041, -1.2578182220458984, 324.0140075683594, 140.56005859375, 152.43984985351562, -0.11380577087402344, 429.9853820800781, -33.029563903808594, 27.99828338623047, 67.45146179199219, -14.964859008789062, 211.84153747558594, 34.94781494140625, 126.92523193359375, -195.67514038085938, 271.0198669433594, -78.75127410888672, 90.42090606689453, -9.993240356445312, -59.23402786254883], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000185.npy"}
|
||||
{"epoch": 0.27165932452276065, "step": 186, "batch_size": 64, "mean": 97.591064453125, "std": 119.95823669433594, "min": -197.9208984375, "p10": -24.304633712768556, "median": 79.59574127197266, "p90": 237.80258789062506, "max": 463.24365234375, "pos_frac": 0.8125, "sample": [-197.9208984375, 125.27824401855469, 317.3908386230469, 149.8588409423828, 150.91146850585938, 2.5003662109375, 80.56857299804688, 44.960079193115234, 214.43377685546875, 157.0322265625, 68.91973876953125, 221.3173828125, 1.1148605346679688, 83.561767578125, 38.70840835571289, -24.404926300048828, 94.6572265625, -117.32179260253906, 41.98378372192383, 49.149749755859375, 313.1305236816406, 45.926273345947266, -5.631217956542969, 174.65806579589844, 40.612281799316406, 78.62290954589844, 102.49497985839844, 148.815185546875, 330.039306640625, 104.85832214355469, 179.77749633789062, -28.47464942932129, 154.115234375, 69.30937194824219, 28.38435173034668, 125.62791442871094, 396.67791748046875, 244.86767578125, 49.12312698364258, 4.630329132080078, 158.7857666015625, 29.226091384887695, 76.60235595703125, 62.257354736328125, -73.37734985351562, 403.8427734375, -20.463241577148438, 73.44806671142578, -24.07061767578125, -44.94629669189453, 463.24365234375, 158.96200561523438, 70.46829223632812, 144.1658172607422, -0.109161376953125, 143.4207763671875, 94.09178161621094, -26.429588317871094, 5.0985870361328125, 122.63691711425781, 145.3740234375, 134.36036682128906, -5.628509521484375, 94.60321044921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000186.npy"}
|
||||
{"epoch": 0.27312775330396477, "step": 187, "batch_size": 64, "mean": 80.84567260742188, "std": 107.22187805175781, "min": -151.29153442382812, "p10": -36.967428588867186, "median": 61.01144790649414, "p90": 215.2120788574219, "max": 391.4598388671875, "pos_frac": 0.78125, "sample": [-151.29153442382812, 210.62228393554688, -22.471471786499023, 58.923274993896484, -77.01469421386719, 234.81985473632812, 82.64594268798828, 157.84576416015625, 53.595680236816406, 41.36354446411133, 83.28648376464844, 63.045204162597656, 337.5389404296875, 78.81194305419922, 124.44697570800781, 212.44000244140625, 162.08035278320312, 65.6378173828125, 113.44790649414062, -46.118831634521484, 57.51863098144531, 36.23063659667969, 391.4598388671875, -32.1004638671875, 161.17137145996094, 57.37837219238281, 232.61184692382812, -37.28829574584961, 189.92384338378906, 43.17295837402344, -45.705841064453125, 78.4260482788086, 19.058792114257812, -55.99598693847656, -36.2187385559082, 155.75128173828125, -34.57048797607422, 9.611181259155273, -6.902549743652344, 215.33541870117188, 149.5516815185547, 348.96051025390625, 93.95647430419922, 43.515037536621094, 65.60675048828125, 34.433712005615234, 57.75136184692383, 40.200897216796875, -119.37720489501953, 41.79035186767578, 104.56928253173828, -13.847587585449219, 156.4573516845703, 87.82362365722656, 76.04244995117188, 199.2375946044922, 86.28164672851562, 2.1301345825195312, 58.977691650390625, 228.05699157714844, 214.92428588867188, 38.74524688720703, -22.869565963745117, 18.681467056274414], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000187.npy"}
|
||||
{"epoch": 0.2745961820851689, "step": 188, "batch_size": 64, "mean": 97.13883209228516, "std": 119.12627410888672, "min": -127.86939239501953, "p10": -17.246684265136714, "median": 74.27271270751953, "p90": 285.40931396484376, "max": 496.2091064453125, "pos_frac": 0.8125, "sample": [290.2723083496094, 123.96797943115234, 243.93577575683594, 63.77437210083008, 75.2160873413086, -2.8163604736328125, -29.729171752929688, -120.1723403930664, 290.8521728515625, 80.96211242675781, 53.370330810546875, -31.296730041503906, 112.74907684326172, 81.02186584472656, 282.52386474609375, 246.92079162597656, 94.46343231201172, -19.84845733642578, 61.642486572265625, 9.300674438476562, 6.8990020751953125, 29.336669921875, 36.81242370605469, 142.58206176757812, 72.79125213623047, 25.433250427246094, 11.581863403320312, -4.812534332275391, 318.6917419433594, 47.745574951171875, 354.86822509765625, -127.86939239501953, 286.64593505859375, 112.34742736816406, 159.6602783203125, 112.97247314453125, 64.9224853515625, 188.31666564941406, -9.992233276367188, 189.45004272460938, 166.6415557861328, 13.684890747070312, -83.53652954101562, -21.31757164001465, 193.97364807128906, 41.46690368652344, 213.07098388671875, 25.864116668701172, 12.233627319335938, 324.31329345703125, 87.32511138916016, 167.12136840820312, 53.46279525756836, -11.026042938232422, 167.197509765625, 81.17190551757812, 99.86173248291016, 496.2091064453125, 94.66293334960938, 73.32933807373047, 76.2674560546875, -11.175880432128906, 13.632026672363281, 16.957517623901367], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000188.npy"}
|
||||
{"epoch": 0.27606461086637296, "step": 189, "batch_size": 64, "mean": 80.89956665039062, "std": 133.24851989746094, "min": -170.42987060546875, "p10": -64.2357925415039, "median": 58.43536949157715, "p90": 200.66311645507815, "max": 576.6710205078125, "pos_frac": 0.78125, "sample": [41.32386016845703, 178.7076416015625, -118.47453308105469, 350.2637939453125, -68.23917388916016, -54.894569396972656, 43.35022735595703, 4.322807312011719, 1.4209346771240234, 40.35174560546875, 148.8533935546875, 153.38150024414062, 149.88751220703125, -7.117279052734375, 158.20913696289062, 46.378265380859375, 193.52874755859375, 45.31422805786133, -170.42987060546875, 5.808633804321289, -81.0353775024414, 29.476579666137695, 78.95573425292969, 172.0858154296875, 145.64114379882812, 81.72482299804688, -43.65266418457031, 55.89651107788086, 33.453495025634766, 190.59100341796875, 5.662120819091797, 21.699880599975586, 95.11495971679688, 482.040283203125, 61.02204895019531, -23.26372528076172, 24.34520721435547, 221.15126037597656, 85.03257751464844, 12.992149353027344, -161.31997680664062, 121.53812408447266, -50.61260986328125, 121.7003402709961, 58.739070892333984, 331.4154052734375, 168.55751037597656, 576.6710205078125, -21.79486846923828, 128.50894165039062, 173.1173095703125, 29.380168914794922, 79.92527770996094, 157.7870330810547, 109.21273803710938, 203.720703125, 244.15121459960938, 95.43302917480469, -0.9066619873046875, -150.06402587890625, 58.13166809082031, -75.11103820800781, 23.690231323242188, 164.82078552246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000189.npy"}
|
||||
{"epoch": 0.2775330396475771, "step": 190, "batch_size": 64, "mean": 95.25032043457031, "std": 138.31202697753906, "min": -198.79246520996094, "p10": -63.017374420166014, "median": 67.86361312866211, "p90": 307.9510498046875, "max": 420.7372741699219, "pos_frac": 0.796875, "sample": [-3.6136474609375, 114.65856170654297, 184.90927124023438, 157.78964233398438, 19.29041290283203, -81.81708526611328, 252.85855102539062, 43.64189910888672, 12.856464385986328, 356.8375244140625, 82.97314453125, 69.33780670166016, 134.86061096191406, 20.097007751464844, 358.85638427734375, 142.62298583984375, 103.66789245605469, 76.05201721191406, 193.75283813476562, 18.40631103515625, 2.7015552520751953, 265.0103759765625, -198.79246520996094, 65.61922454833984, 262.1053161621094, -23.449996948242188, 61.044036865234375, 321.3260498046875, 154.43247985839844, 54.6015625, -17.514328002929688, 306.4403076171875, 20.667373657226562, -4.658349990844727, 128.3443603515625, 79.87776184082031, -94.02729034423828, -64.2806396484375, -150.20806884765625, 16.480026245117188, 116.37032318115234, -60.06975555419922, 100.24895477294922, 54.05702209472656, -26.696748733520508, -87.98382568359375, 97.3602294921875, 308.5985107421875, 52.91033935546875, 59.546897888183594, -188.8704376220703, 390.06744384765625, 119.35333251953125, 25.87711524963379, 206.83543395996094, 106.28458404541016, 66.38941955566406, 381.2120361328125, 4.822486877441406, 420.7372741699219, 206.5726318359375, 221.81922912597656, 34.97187423706055, 41.847503662109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000190.npy"}
|
||||
{"epoch": 0.2790014684287812, "step": 191, "batch_size": 64, "mean": 136.34649658203125, "std": 148.77737426757812, "min": -142.0181884765625, "p10": -7.230722808837889, "median": 113.85742950439453, "p90": 318.81248474121094, "max": 557.7564697265625, "pos_frac": 0.859375, "sample": [465.1551208496094, 109.53019714355469, 557.7564697265625, 32.07133483886719, 297.1974182128906, 72.05302429199219, 135.91441345214844, 268.7774353027344, 15.762395858764648, 355.41180419921875, 99.47569274902344, 162.39828491210938, -7.647926330566406, 263.71319580078125, 282.0091857910156, 38.40186309814453, 37.61028289794922, 80.10523223876953, 30.933853149414062, 81.31778717041016, 263.6033935546875, 60.359832763671875, 43.14996337890625, 301.701171875, 123.39925384521484, 4.1375274658203125, 8.52629280090332, 530.9663696289062, 176.57521057128906, -23.17523956298828, 217.98724365234375, -72.80975341796875, 17.020172119140625, -49.47098159790039, 140.96730041503906, 177.385498046875, 172.07296752929688, 143.99652099609375, 320.88592529296875, 29.935575485229492, 8.26637077331543, 245.93008422851562, 217.98294067382812, -142.0181884765625, 73.22334289550781, 504.7397766113281, 32.11531066894531, -56.35105895996094, 118.18466186523438, 102.06297302246094, 182.48228454589844, 313.9744567871094, 30.723960876464844, -1.8846855163574219, 126.02976989746094, 175.09947204589844, -62.496009826660156, 122.89871215820312, -6.2572479248046875, 126.5656509399414, 357.0443115234375, 35.57379913330078, 86.28129577636719, 170.84317016601562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000191.npy"}
|
||||
{"epoch": 0.28046989720998533, "step": 192, "batch_size": 64, "mean": 91.21162414550781, "std": 145.2598114013672, "min": -246.18869018554688, "p10": -54.44992065429687, "median": 73.8321418762207, "p90": 311.84131469726566, "max": 526.2483520507812, "pos_frac": 0.734375, "sample": [-57.137611389160156, -137.5897674560547, 40.77069091796875, 166.63092041015625, 132.63027954101562, -32.11711883544922, -48.17864227294922, -3.3007373809814453, 32.3861083984375, -246.18869018554688, 30.792694091796875, 97.80740356445312, -36.92445373535156, 217.6219940185547, 18.595382690429688, 40.95652770996094, -76.76213073730469, 106.05494689941406, -44.311187744140625, 169.0894775390625, -117.18419647216797, 403.8091125488281, -42.96608352661133, 75.5770492553711, -17.655532836914062, 317.1234130859375, 354.43304443359375, 377.3632507324219, 160.23141479492188, 359.20208740234375, 96.38920593261719, 78.84541320800781, 526.2483520507812, 125.15524291992188, 172.65289306640625, 25.25758934020996, 115.11044311523438, 213.87379455566406, 270.0832214355469, 57.86105728149414, 44.79008483886719, 299.51641845703125, 7.457178115844727, 37.35224914550781, 132.16456604003906, 57.997398376464844, 60.84192657470703, -164.3693084716797, 203.65487670898438, 74.40772247314453, 82.51175689697266, -11.16058349609375, -44.401206970214844, -62.82025909423828, 16.783533096313477, 229.03482055664062, 73.25656127929688, -28.947006225585938, 39.27618408203125, 210.32772827148438, 139.67822265625, 104.11811828613281, 79.23588562011719, 334.6003112792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000192.npy"}
|
||||
{"epoch": 0.28193832599118945, "step": 193, "batch_size": 64, "mean": 53.221702575683594, "std": 139.67852783203125, "min": -414.28167724609375, "p10": -82.29847106933592, "median": 54.093040466308594, "p90": 225.68218536376958, "max": 361.3994140625, "pos_frac": 0.65625, "sample": [115.52641296386719, -16.70764923095703, 29.171119689941406, 189.62185668945312, -414.28167724609375, 95.9330825805664, 65.65380096435547, 57.23219299316406, -403.68035888671875, 304.5050048828125, 65.51436614990234, -4.878225326538086, 187.11090087890625, -35.51499557495117, -19.79193115234375, 91.36997985839844, 94.27257537841797, 125.60302734375, -0.25078582763671875, 229.086669921875, 3.3283119201660156, -17.63404083251953, 14.24551773071289, 153.417724609375, 23.413175582885742, 126.99151611328125, -47.22829055786133, -33.369056701660156, 1.9430694580078125, -59.97917175292969, -25.620553970336914, 217.73838806152344, -89.40281677246094, -94.8261489868164, 243.46267700195312, -125.45197296142578, 186.30654907226562, -99.0498275756836, 14.300233840942383, 50.953887939453125, 10.318984985351562, 140.84835815429688, 267.7999572753906, 166.75146484375, 111.73695373535156, -135.94973754882812, 316.3897705078125, 91.90767669677734, 123.02814483642578, 124.13631439208984, -57.98255157470703, 96.95939636230469, -18.818462371826172, -65.72166442871094, -44.61616516113281, 141.58279418945312, 361.3994140625, 131.45358276367188, -53.79117965698242, 21.018035888671875, 276.407470703125, 81.7709732055664, 19.776611328125, 100.74811553955078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000193.npy"}
|
||||
{"epoch": 0.2834067547723935, "step": 194, "batch_size": 64, "mean": 86.59368133544922, "std": 100.32608032226562, "min": -98.33474731445312, "p10": -51.169614410400385, "median": 66.06909561157227, "p90": 244.2615966796875, "max": 300.4742126464844, "pos_frac": 0.8125, "sample": [140.03421020507812, 20.19281768798828, 33.09564208984375, 12.800125122070312, 120.76046752929688, 260.1180419921875, 105.70558166503906, 156.3927459716797, -26.75299072265625, 19.692394256591797, 29.09063148498535, 250.33560180664062, -69.08206176757812, -58.35516357421875, 44.49176788330078, 62.291259765625, 210.0174560546875, 60.24555206298828, 62.48280715942383, 132.55105590820312, 31.172794342041016, 78.22789001464844, 14.68350601196289, 7.056621551513672, 119.43238067626953, -75.014892578125, -3.724172592163086, 214.99343872070312, 66.3163070678711, 77.83385467529297, 254.60414123535156, 133.90269470214844, 241.9353485107422, 177.10592651367188, 29.39706802368164, 285.28924560546875, 63.90064239501953, 32.842742919921875, 144.71974182128906, 47.8609619140625, 189.29893493652344, -84.80752563476562, 44.04087829589844, -14.652565002441406, 104.85430908203125, 168.29092407226562, -3.429584503173828, -98.33474731445312, 40.90430450439453, -55.221282958984375, 109.92164611816406, -64.71967315673828, 63.602203369140625, 245.25856018066406, 79.70785522460938, 300.4742126464844, 131.02809143066406, 65.82188415527344, 292.31298828125, 87.1045913696289, 178.26519775390625, 180.41787719726562, 114.92793273925781, -41.715721130371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000194.npy"}
|
||||
{"epoch": 0.28487518355359764, "step": 195, "batch_size": 64, "mean": 93.7316665649414, "std": 116.76734924316406, "min": -96.92083740234375, "p10": -44.9694034576416, "median": 79.96736145019531, "p90": 243.4078491210938, "max": 394.6134948730469, "pos_frac": 0.796875, "sample": [82.76483917236328, -26.420989990234375, 221.55516052246094, 46.13014602661133, 71.85902404785156, 394.6134948730469, 72.9573745727539, 311.96026611328125, 223.3995361328125, -9.172346115112305, -6.670257568359375, 49.961395263671875, 3.2907791137695312, -93.45954895019531, 248.1053466796875, 117.60935974121094, 55.972900390625, 217.80738830566406, -42.68903350830078, 330.34991455078125, -43.5693473815918, 103.35182189941406, -96.92083740234375, 38.415374755859375, 105.4461669921875, 101.97879791259766, 221.79940795898438, 314.9622802734375, 147.62649536132812, 124.6088638305664, 147.05841064453125, -79.39969635009766, 73.07042694091797, 179.09805297851562, 13.47027587890625, 153.0439910888672, -29.896177291870117, 1.4317245483398438, 172.03921508789062, 205.8827362060547, -95.81145477294922, 256.57049560546875, 43.49553298950195, 80.46819305419922, 389.27020263671875, 145.1142578125, 79.4665298461914, 60.45713806152344, -56.342994689941406, 12.75459098815918, 159.5299530029297, -72.04566955566406, 232.447021484375, 111.12763977050781, 20.884963989257812, 86.83279418945312, 31.077354431152344, 22.89759635925293, 18.3452205657959, 85.69930267333984, -45.569427490234375, 51.86738967895508, 147.1588134765625, 109.70831298828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000195.npy"}
|
||||
{"epoch": 0.28634361233480177, "step": 196, "batch_size": 64, "mean": 106.43045043945312, "std": 142.9573974609375, "min": -218.5308837890625, "p10": -49.92964172363281, "median": 80.9216194152832, "p90": 307.9012451171875, "max": 444.94635009765625, "pos_frac": 0.765625, "sample": [195.91085815429688, 205.08221435546875, 312.79827880859375, 218.32005310058594, -46.75910186767578, 289.98565673828125, 301.927490234375, 437.60784912109375, 232.06686401367188, 73.92831420898438, 9.931865692138672, 48.609291076660156, 276.94970703125, 196.42483520507812, 48.194854736328125, 11.75370979309082, -28.12875747680664, 90.74562072753906, -6.3095245361328125, 49.35328674316406, 427.35772705078125, -6.034168243408203, 52.74949645996094, 35.772151947021484, 131.37002563476562, 133.59530639648438, 124.67033386230469, 146.56948852539062, 48.04328918457031, -11.829381942749023, 113.70805358886719, 77.1927261352539, 438.9598388671875, 444.94635009765625, -20.152244567871094, 177.5365447998047, 84.6505126953125, 64.41368103027344, 109.75102233886719, 89.01718139648438, 66.54898071289062, 62.087547302246094, 220.61376953125, -2.5924072265625, 140.70758056640625, 31.694355010986328, 84.98680114746094, -151.9547119140625, 69.27911376953125, -218.5308837890625, -68.31256103515625, 154.10043334960938, 93.23782348632812, -58.510929107666016, 311.891357421875, -51.28844451904297, -140.95510864257812, 66.05641174316406, -2.1135711669921875, 126.97350311279297, 262.03546142578125, 310.46142578125, -80.173828125, 4.625080108642578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000196.npy"}
|
||||
{"epoch": 0.2878120411160059, "step": 197, "batch_size": 64, "mean": 82.47078704833984, "std": 116.71819305419922, "min": -120.32475280761719, "p10": -39.616673278808584, "median": 53.25123405456543, "p90": 239.30527191162113, "max": 423.7657470703125, "pos_frac": 0.8125, "sample": [61.141700744628906, 334.37384033203125, 140.17575073242188, -24.825225830078125, 159.514892578125, 7.4412689208984375, 12.999481201171875, -62.760738372802734, 65.93265533447266, 423.7657470703125, 355.3138732910156, 243.5802764892578, 66.0057601928711, 128.2298583984375, -104.51178741455078, 2.8931732177734375, 126.14661407470703, 283.67523193359375, 32.802406311035156, 39.698978424072266, 103.66775512695312, 218.0025177001953, 42.39496994018555, -7.111480712890625, 58.305213928222656, 6.330711364746094, -104.57799530029297, 129.64019775390625, 74.3625259399414, 97.76919555664062, 255.12994384765625, 147.91680908203125, 34.12779235839844, 229.33026123046875, 2.369293212890625, 194.00868225097656, -42.99725341796875, 0.3301239013671875, -78.99557495117188, -120.32475280761719, 33.849082946777344, 127.1346664428711, -1.432535171508789, 5.760902404785156, 17.237112045288086, 77.51719665527344, 338.073486328125, 208.27520751953125, 26.497024536132812, 197.13282775878906, -15.714380264282227, 48.1972541809082, 81.32769012451172, 16.217079162597656, 17.110191345214844, 128.6712646484375, 172.41085815429688, 71.14642333984375, 209.68521118164062, -31.728652954101562, -65.63827514648438, 19.12049102783203, 19.6744384765625, 46.332855224609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000197.npy"}
|
||||
{"epoch": 0.28928046989721, "step": 198, "batch_size": 64, "mean": 105.940673828125, "std": 145.472412109375, "min": -244.53759765625, "p10": -29.44637622833251, "median": 89.70967483520508, "p90": 230.52896575927738, "max": 595.9739990234375, "pos_frac": 0.84375, "sample": [-128.18896484375, 106.16111755371094, -42.988182067871094, 538.579345703125, 124.48857116699219, 196.96383666992188, 342.57562255859375, 56.76620864868164, 151.42198181152344, 49.165504455566406, 110.09954833984375, 167.01104736328125, -139.07791137695312, 5.630943298339844, 0.534210205078125, 235.1439971923828, 145.16134643554688, 115.97525024414062, 86.87582397460938, 86.38105010986328, -22.3429012298584, -32.49072265625, 32.128379821777344, 166.4476776123047, 13.491178512573242, 70.85680389404297, -65.44412994384766, 201.23709106445312, 97.07339477539062, 79.23872375488281, 91.95972442626953, 115.15410614013672, 135.3131103515625, 72.44383239746094, 133.41380310058594, 14.385562896728516, 81.34587097167969, 161.59326171875, 155.27142333984375, 219.76055908203125, -244.53759765625, 542.0923461914062, 99.46222686767578, 65.45709991455078, 1.6118621826171875, 16.546920776367188, 249.87631225585938, -43.35792541503906, 2.2859363555908203, 76.05521392822266, 40.92525863647461, 94.98849487304688, 87.45962524414062, 140.04615783691406, -9.051401138305664, 142.99293518066406, 160.2446746826172, 208.0191650390625, 14.45939826965332, 424.9229736328125, -6.2322235107421875, 595.9739990234375, 48.04668426513672, 142.39817810058594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000198.npy"}
|
||||
{"epoch": 0.2907488986784141, "step": 199, "batch_size": 64, "mean": 104.91361999511719, "std": 141.22674560546875, "min": -170.7671661376953, "p10": -55.02019882202147, "median": 88.84841537475586, "p90": 281.1950927734375, "max": 459.794677734375, "pos_frac": 0.796875, "sample": [217.65762329101562, 168.74073791503906, -138.10897827148438, -9.278167724609375, 274.7083435058594, 76.06478118896484, -59.759552001953125, 208.4912567138672, -166.059814453125, 125.39055633544922, 2.5557022094726562, -81.671875, -168.00393676757812, 205.05223083496094, 209.4993896484375, 61.64942169189453, 15.014350891113281, 136.84356689453125, 399.1781005859375, 42.81806182861328, 89.58706665039062, 177.05543518066406, 372.36602783203125, 313.93145751953125, -97.15083312988281, 35.65235900878906, -25.576759338378906, 126.86345672607422, 210.70428466796875, 23.074249267578125, 39.213523864746094, 253.08456420898438, 459.794677734375, 82.32073974609375, 73.79491424560547, 185.6319580078125, -26.969573974609375, 116.43658447265625, 38.2275390625, 99.6492919921875, 63.424896240234375, 272.6875305175781, 124.8260726928711, 33.6004524230957, 283.9751281738281, 130.14263916015625, 58.26363754272461, -43.961708068847656, -41.88643264770508, -170.7671661376953, 269.4073791503906, 84.53211212158203, 111.76913452148438, 88.1097640991211, 356.92120361328125, 171.35682678222656, 6.102739334106445, 92.13031768798828, 58.924705505371094, 92.93099975585938, 49.097503662109375, 149.53585815429688, 424.030517578125, -19.155136108398438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000199.npy"}
|
||||
{"epoch": 0.2922173274596182, "step": 200, "batch_size": 64, "mean": 103.79698181152344, "std": 158.9437713623047, "min": -288.65985107421875, "p10": -49.35160484313963, "median": 83.80240631103516, "p90": 338.51315917968753, "max": 536.0819091796875, "pos_frac": 0.765625, "sample": [232.25759887695312, 107.68302917480469, 59.00124740600586, 408.7912292480469, 36.09303665161133, 147.342529296875, 12.812736511230469, -3.580718994140625, 437.58709716796875, -61.9498176574707, 326.06024169921875, 245.50372314453125, 98.98458099365234, 6.125514984130859, 83.50404357910156, 327.4212951660156, 220.7760467529297, 394.53704833984375, 0.6065521240234375, 343.2668151855469, 67.1584243774414, -0.43192481994628906, 158.17306518554688, 225.8898162841797, -28.296550750732422, 70.48690795898438, 166.58399963378906, -211.230224609375, 536.0819091796875, 94.36022186279297, 100.57704162597656, -33.86141586303711, -0.6390380859375, 75.19039916992188, 52.22999572753906, -29.734174728393555, -288.65985107421875, 30.22357177734375, 97.48590087890625, 352.7225646972656, 0.7404384613037109, 85.8653335571289, 55.945892333984375, -16.20899200439453, 7.390705108642578, -157.86639404296875, 84.10076904296875, -55.990257263183594, 300.2573547363281, -33.021095275878906, 5.3977203369140625, 425.63818359375, 44.661712646484375, 18.744773864746094, 103.70903015136719, 259.8455810546875, 152.53167724609375, 164.4581298828125, 103.77923583984375, 170.2305908203125, -120.99433898925781, 139.87588500976562, -78.71687316894531, 125.49677276611328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000200.npy"}
|
||||
{"epoch": 0.2936857562408223, "step": 201, "batch_size": 64, "mean": 102.45198059082031, "std": 146.0702362060547, "min": -291.5274963378906, "p10": -53.54555053710936, "median": 81.72940444946289, "p90": 285.98340148925786, "max": 481.3332824707031, "pos_frac": 0.8125, "sample": [253.06854248046875, 16.00981330871582, 227.81935119628906, -220.1278076171875, 46.21173858642578, -93.49711608886719, 8.53018569946289, 14.901311874389648, -27.458267211914062, 85.04730224609375, 3.020477294921875, 27.745513916015625, 35.807373046875, -18.815872192382812, 481.3332824707031, 106.66861724853516, 63.29881286621094, 102.73171997070312, 24.141204833984375, 238.10806274414062, 337.09130859375, 124.32079315185547, 11.497726440429688, 85.21424865722656, 224.28292846679688, 290.88946533203125, -85.63442993164062, 232.3788604736328, 202.25180053710938, -8.559438705444336, 70.327880859375, 122.74530029296875, 136.14761352539062, 119.87126159667969, -33.78620910644531, -75.6292724609375, 177.03158569335938, 73.33818817138672, 56.93351745605469, 468.46282958984375, 106.1561279296875, -24.860008239746094, 169.69155883789062, 77.87515258789062, 48.93701171875, 169.86717224121094, 161.78067016601562, 246.03724670410156, 400.6033630371094, 73.49331665039062, 91.9976577758789, -71.93350219726562, 148.15049743652344, 78.41150665283203, -291.5274963378906, 299.092041015625, 423.4743957519531, 141.76747131347656, 6.779352188110352, -62.01383972167969, 274.5359191894531, 48.773094177246094, 12.527481079101562, 123.59056854248047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000201.npy"}
|
||||
{"epoch": 0.29515418502202645, "step": 202, "batch_size": 64, "mean": 73.0613021850586, "std": 144.16807556152344, "min": -219.86581420898438, "p10": -107.13042755126949, "median": 66.129150390625, "p90": 258.3034912109375, "max": 535.3772583007812, "pos_frac": 0.703125, "sample": [535.3772583007812, -52.10062789916992, 118.62454986572266, 322.021240234375, 2.5854129791259766, -60.787330627441406, 199.52752685546875, -58.19976043701172, 124.62033081054688, 94.87437438964844, 58.13652801513672, -186.70936584472656, 173.59146118164062, 242.83828735351562, 203.17990112304688, 50.28376770019531, -199.16070556640625, 48.760772705078125, 106.85539245605469, 348.71282958984375, 118.22762298583984, 286.45660400390625, -133.60916137695312, 196.976806640625, 28.559288024902344, 28.33910369873047, 390.752197265625, -17.182966232299805, 106.07785034179688, 132.27688598632812, 107.70732116699219, 126.48747253417969, 261.20343017578125, -28.439849853515625, -39.27081298828125, -124.10520935058594, 84.72218322753906, 63.821624755859375, 7.189933776855469, 235.57769775390625, 104.03111267089844, -7.2718963623046875, 22.164440155029297, 258.4341735839844, 166.5704345703125, 114.37623596191406, 82.22783660888672, -219.86581420898438, 68.43667602539062, 105.81570434570312, 16.638912200927734, 45.67668151855469, -43.58253479003906, -74.30620574951172, 44.3941650390625, -120.94432067871094, 83.62283325195312, -74.89801025390625, -45.769657135009766, 55.363868713378906, 257.9985656738281, -142.94102478027344, 95.52751159667969, -20.57644271850586], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000202.npy"}
|
||||
{"epoch": 0.2966226138032305, "step": 203, "batch_size": 64, "mean": 90.73899841308594, "std": 125.97115325927734, "min": -185.37420654296875, "p10": -74.4354507446289, "median": 75.8685073852539, "p90": 261.6606369018555, "max": 463.81072998046875, "pos_frac": 0.78125, "sample": [19.024433135986328, 137.49044799804688, 164.79580688476562, 63.426536560058594, -29.888973236083984, 285.30084228515625, -79.21707153320312, 148.43515014648438, 24.46582794189453, 130.57003784179688, -0.380218505859375, 108.8886489868164, 106.71875762939453, 164.90676879882812, -112.59663391113281, 195.38645935058594, 141.42295837402344, -31.337783813476562, 192.86038208007812, 139.6648712158203, 272.0342102050781, 417.47784423828125, -80.04470825195312, 108.95506286621094, 126.55535125732422, 40.37724304199219, 463.81072998046875, 11.532844543457031, -90.4300537109375, 153.7486114501953, 11.474021911621094, 176.85214233398438, 191.89114379882812, 269.6022644042969, 44.24322509765625, 186.7405548095703, 87.21369934082031, 110.321044921875, 80.45632934570312, 131.5811767578125, 29.861663818359375, 67.7464370727539, 45.881248474121094, 238.97927856445312, -60.831764221191406, -185.37420654296875, -66.73625183105469, 71.28068542480469, -26.397768020629883, 7.282199859619141, 173.526123046875, 14.843605041503906, 25.41146469116211, 47.14164733886719, -82.82977294921875, -77.735107421875, 145.8377685546875, 58.17169189453125, 286.9734802246094, 324.7407531738281, -10.96385383605957, 243.1301727294922, 30.56743812561035, 22.45922088623047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000203.npy"}
|
||||
{"epoch": 0.29809104258443464, "step": 204, "batch_size": 64, "mean": 82.12925720214844, "std": 146.71878051757812, "min": -239.21917724609375, "p10": -77.05341339111325, "median": 65.44140243530273, "p90": 309.27540283203143, "max": 487.60345458984375, "pos_frac": 0.703125, "sample": [218.2355194091797, 76.08462524414062, 40.38705062866211, 44.49109649658203, 124.2634048461914, -16.184837341308594, 215.94500732421875, -104.09838104248047, 95.00109100341797, 22.96709632873535, -42.250553131103516, 21.47902488708496, 13.645217895507812, 85.8820571899414, 55.60888671875, -42.74962615966797, -239.21917724609375, 172.43905639648438, 50.99833679199219, 8.952150344848633, 266.81884765625, -128.9794921875, -21.817062377929688, -5.566226959228516, 71.75094604492188, -16.312843322753906, 73.76564025878906, 148.8251190185547, 156.18695068359375, -88.72874450683594, 397.91851806640625, -21.80725860595703, 63.74769592285156, 31.269750595092773, -46.49200439453125, 156.61300659179688, 58.32505798339844, 150.92379760742188, 424.30596923828125, -209.50904846191406, 327.4710693359375, 172.45242309570312, 171.047119140625, 74.53022766113281, 92.21038818359375, -12.179903030395508, 42.57005310058594, 231.3274688720703, 334.0347595214844, -9.697296142578125, 82.8319091796875, 170.03179931640625, -129.84881591796875, 67.1351089477539, 487.60345458984375, 374.9876708984375, -38.78990936279297, 150.1005859375, 23.971086502075195, 356.2106018066406, -120.2734375, 81.06704711914062, 114.1746826171875, -49.81097412109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000204.npy"}
|
||||
{"epoch": 0.29955947136563876, "step": 205, "batch_size": 64, "mean": 110.42106628417969, "std": 142.15138244628906, "min": -250.49148559570312, "p10": -68.68245162963866, "median": 116.51107406616211, "p90": 273.96396179199223, "max": 406.1178894042969, "pos_frac": 0.78125, "sample": [39.870418548583984, 140.43704223632812, 224.40127563476562, 376.6222839355469, 120.69709014892578, 12.209909439086914, -8.250391006469727, 35.45306396484375, 9.333160400390625, 233.50827026367188, 264.67236328125, -95.25018310546875, -104.11943054199219, 95.39449310302734, -16.831802368164062, 76.02474975585938, 16.72046661376953, 378.7012023925781, 239.36207580566406, 28.94808578491211, 111.56995391845703, 101.74676513671875, -130.52340698242188, 173.1090850830078, 307.83160400390625, 246.94903564453125, 196.92071533203125, -0.4632453918457031, -66.58863067626953, 213.34176635742188, -42.981788635253906, 406.1178894042969, 132.12405395507812, 82.06157684326172, 36.199501037597656, 132.35064697265625, 398.4334411621094, 261.15948486328125, 120.54423522949219, 138.22779846191406, -218.74435424804688, 135.2164764404297, 120.62300872802734, 225.1663360595703, 156.93701171875, -69.57980346679688, 151.8563690185547, 86.18952178955078, 277.9460754394531, 86.61547088623047, 245.0998077392578, 192.15834045410156, 177.25157165527344, 68.9072494506836, 152.7228546142578, 366.9892272949219, 112.47791290283203, -13.383598327636719, 20.496490478515625, -250.49148559570312, -73.00552368164062, 216.3035888671875, -27.59063720703125, 40.75151062011719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000205.npy"}
|
||||
{"epoch": 0.3010279001468429, "step": 206, "batch_size": 64, "mean": 118.73365783691406, "std": 129.02308654785156, "min": -233.7353515625, "p10": -29.334462928771956, "median": 103.69136428833008, "p90": 298.5227508544923, "max": 406.1219482421875, "pos_frac": 0.84375, "sample": [53.019073486328125, 189.36529541015625, 83.74800109863281, 123.56716918945312, 406.1219482421875, 311.8946228027344, 78.72918701171875, 55.167320251464844, 214.4070281982422, 261.80792236328125, 54.65202331542969, 136.35423278808594, 25.216611862182617, 104.74884796142578, -233.7353515625, 59.554813385009766, 80.08863830566406, 168.46922302246094, 61.639862060546875, -13.969228744506836, 227.87515258789062, -35.91956329345703, 240.0263671875, 353.3362731933594, 123.66505432128906, 199.94293212890625, 23.09380340576172, 204.99879455566406, -107.68122863769531, -108.50305938720703, 182.00880432128906, 312.3082275390625, 218.99118041992188, 232.29637145996094, 6.114030838012695, 258.924560546875, 76.61103057861328, -69.103759765625, 159.96749877929688, -148.02915954589844, 49.52599334716797, 211.96145629882812, 6.723529815673828, 166.992919921875, 91.36295318603516, -12.984725952148438, 125.08647918701172, 82.9749526977539, 196.30918884277344, 98.07917785644531, -58.624427795410156, 12.115867614746094, 92.8233413696289, 184.76708984375, 365.30828857421875, 267.32171630859375, -4.377832412719727, 122.34077453613281, 102.63388061523438, 312.4188537597656, 70.30188751220703, 23.40375328063965, 314.86181640625, 205.85650634765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000206.npy"}
|
||||
{"epoch": 0.302496328928047, "step": 207, "batch_size": 64, "mean": 134.29856872558594, "std": 157.00250244140625, "min": -142.69869995117188, "p10": -31.282675170898433, "median": 90.75166320800781, "p90": 307.95274047851564, "max": 591.1517944335938, "pos_frac": 0.84375, "sample": [13.796077728271484, -142.69869995117188, 63.40319061279297, 107.30810546875, 181.67770385742188, 46.622764587402344, 306.700439453125, -27.84764862060547, -18.511032104492188, 9.36578369140625, 85.38824462890625, 76.18465423583984, 304.3537902832031, -7.5493927001953125, 132.57647705078125, 151.8542938232422, 46.57023620605469, 91.11798095703125, 37.021427154541016, 33.10900115966797, 47.39097213745117, 169.80264282226562, 30.82085418701172, 264.91864013671875, 35.42594909667969, -67.98104858398438, 308.48944091796875, 64.37136840820312, 29.049245834350586, -52.70058059692383, 284.0674743652344, 258.61614990234375, -58.54521179199219, 0.23253631591796875, 30.040794372558594, 257.5781555175781, 35.39067077636719, 240.2443389892578, 152.30795288085938, 257.083251953125, 258.39410400390625, 127.2259521484375, 112.100830078125, 134.55650329589844, 260.8211669921875, 246.84637451171875, 90.38534545898438, 385.80218505859375, 517.3919677734375, 277.11962890625, 48.45380401611328, 437.8953857421875, -60.11436462402344, 35.52611541748047, 591.1517944335938, 43.900115966796875, 92.94902038574219, 99.43598937988281, 87.88687133789062, 521.0414428710938, -87.55359649658203, 193.67080688476562, 435.9289855957031, -32.75482940673828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000207.npy"}
|
||||
{"epoch": 0.3039647577092511, "step": 208, "batch_size": 64, "mean": 105.24873352050781, "std": 155.9709930419922, "min": -332.22088623046875, "p10": -43.0328025817871, "median": 81.1707534790039, "p90": 311.29040222167976, "max": 442.8934326171875, "pos_frac": 0.765625, "sample": [-200.9269561767578, 148.71302795410156, -160.68072509765625, 286.7371826171875, 26.413650512695312, -18.147342681884766, 389.3555908203125, 23.694623947143555, 116.45954132080078, 247.4610595703125, 242.8999786376953, 56.759376525878906, 84.87335205078125, 161.07069396972656, 219.33441162109375, 57.388099670410156, 442.8934326171875, 16.256561279296875, 436.80816650390625, 139.7373046875, -10.34771728515625, 35.493568420410156, 253.69561767578125, 177.5811767578125, 77.46815490722656, 27.77686309814453, 54.826690673828125, -9.772247314453125, -132.9364471435547, 274.7597961425781, -145.92019653320312, -9.92934799194336, 51.26030349731445, 108.1568374633789, 131.95416259765625, 294.8179931640625, 256.9632873535156, 38.641658782958984, -14.347160339355469, 228.27511596679688, -45.94879150390625, 72.49755859375, 27.40955352783203, 349.61029052734375, 136.10122680664062, 318.3500061035156, 86.5141830444336, -332.22088623046875, -36.22882843017578, 147.83099365234375, 15.742630004882812, 125.55415344238281, 327.63275146484375, 99.10188293457031, 54.23310852050781, -134.47369384765625, 25.179779052734375, 383.2433166503906, 183.95010375976562, 285.61053466796875, 200.61038208007812, 67.907470703125, -8.633819580078125, -19.17437744140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000208.npy"}
|
||||
{"epoch": 0.3054331864904552, "step": 209, "batch_size": 64, "mean": 93.79731750488281, "std": 152.31752014160156, "min": -344.812255859375, "p10": -81.12847900390625, "median": 82.56779098510742, "p90": 315.53398742675785, "max": 431.5146484375, "pos_frac": 0.71875, "sample": [333.60150146484375, 82.71200561523438, 162.0950469970703, -47.466026306152344, 372.85931396484375, 406.6541748046875, 321.8120422363281, 113.32902526855469, 130.15631103515625, 92.40240478515625, -66.282470703125, 93.79869842529297, 170.08200073242188, 82.42357635498047, 166.80763244628906, 135.7464599609375, 165.07069396972656, -154.81886291503906, -81.97172546386719, 65.62004852294922, 260.12359619140625, 237.08172607421875, -2.292348861694336, -84.81820678710938, 44.82643127441406, -50.219879150390625, -15.245386123657227, 1.0721664428710938, 97.95942687988281, 156.61228942871094, 123.25651550292969, 85.25514221191406, -131.01998901367188, -47.98947525024414, 62.698692321777344, 68.88616943359375, -79.16090393066406, -24.544918060302734, 364.8768310546875, 292.945068359375, 52.71253967285156, 431.5146484375, 7.674598693847656, -44.968345642089844, -26.822509765625, 43.000648498535156, 57.58882141113281, 74.91416931152344, 253.41407775878906, 136.95884704589844, 149.02523803710938, -33.152183532714844, -113.3236083984375, -344.812255859375, 180.7476806640625, 76.29290771484375, 44.18503189086914, 48.91722869873047, 131.092041015625, -157.28033447265625, 335.96240234375, 300.88519287109375, 229.041259765625, 264.5257873535156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000209.npy"}
|
||||
{"epoch": 0.3069016152716593, "step": 210, "batch_size": 64, "mean": 108.67684173583984, "std": 148.003662109375, "min": -244.9134521484375, "p10": -27.657365608215333, "median": 80.71089553833008, "p90": 315.6121643066407, "max": 473.16015625, "pos_frac": 0.765625, "sample": [-5.640533447265625, 33.89916229248047, 133.53829956054688, 71.64907836914062, -15.261611938476562, -83.18377685546875, 181.9913330078125, -27.433385848999023, 135.52484130859375, -30.83721160888672, 194.58612060546875, 13.050315856933594, 25.696086883544922, 96.76110076904297, 271.028564453125, 149.12054443359375, -16.817489624023438, 68.60801696777344, 196.78253173828125, 10.63840103149414, 296.65093994140625, 89.77271270751953, 30.665002822875977, 237.606689453125, 273.98919677734375, 3.014341354370117, 10.815437316894531, 322.145751953125, 191.27337646484375, 361.5818786621094, 117.12853240966797, 245.5277862548828, 117.56751251220703, 300.36712646484375, -141.58932495117188, -27.75335693359375, -78.4293212890625, -13.547409057617188, 32.95454406738281, 18.209728240966797, 142.9185028076172, 167.93856811523438, 365.08905029296875, 110.66661071777344, 0.5939807891845703, -19.058197021484375, 434.2348327636719, -11.037612915039062, 130.73985290527344, 159.8555908203125, 281.06207275390625, 473.16015625, 286.6082763671875, 52.56830596923828, -60.723480224609375, 89.947021484375, -6.612510681152344, 9.357120513916016, 12.09486198425293, 367.6357421875, 5.006584167480469, 18.85655975341797, -244.9134521484375, 397.6778564453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000210.npy"}
|
||||
{"epoch": 0.30837004405286345, "step": 211, "batch_size": 64, "mean": 92.21821594238281, "std": 165.3912353515625, "min": -222.24288940429688, "p10": -65.56961746215819, "median": 59.7443962097168, "p90": 265.26828308105473, "max": 566.3486328125, "pos_frac": 0.703125, "sample": [133.10214233398438, -55.21302795410156, 254.1651611328125, 104.82701873779297, 70.55703735351562, 155.224365234375, -48.711944580078125, 112.89942169189453, 533.4092407226562, -222.24288940429688, -40.94646453857422, 41.51115417480469, 26.43912124633789, -0.7652587890625, 70.95358276367188, 223.96095275878906, 152.39796447753906, 192.89202880859375, 163.9072265625, -17.368576049804688, -5.433685302734375, -43.32147216796875, 191.72341918945312, 108.47941589355469, -70.0081558227539, 3.2061920166015625, -46.828224182128906, 521.598388671875, 48.436161041259766, 566.3486328125, 453.974853515625, -39.31006622314453, -132.77218627929688, 143.2978973388672, 249.0118408203125, 329.3950500488281, -155.08111572265625, 138.44422912597656, 44.52043151855469, -3.6011505126953125, 123.78108215332031, 431.27197265625, -40.569740295410156, 78.1820297241211, 4.713056564331055, 52.11769104003906, 63.03333282470703, -84.88560485839844, 16.93899917602539, 199.477294921875, -157.8511199951172, 56.45545959472656, 207.62953186035156, 50.15215301513672, 160.9496612548828, 270.0267639160156, 95.95043182373047, 240.07858276367188, 9.078201293945312, 127.12641906738281, 39.4901008605957, 17.010345458984375, -14.822006225585938, -196.44766235351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000211.npy"}
|
||||
{"epoch": 0.30983847283406757, "step": 212, "batch_size": 64, "mean": 132.64306640625, "std": 171.94717407226562, "min": -254.05787658691406, "p10": -57.40341491699217, "median": 132.4440574645996, "p90": 317.30950012207035, "max": 602.46630859375, "pos_frac": 0.75, "sample": [-254.05787658691406, -88.01900482177734, 318.4476013183594, 264.67620849609375, 59.681907653808594, 250.41909790039062, 106.49932861328125, -64.41500854492188, -37.5531005859375, -2.5286026000976562, 231.6271514892578, 171.1409912109375, -210.0536346435547, -41.04302978515625, -28.718387603759766, 7.177330017089844, 296.9002685546875, 217.22604370117188, 434.41717529296875, 249.03123474121094, 115.78515625, -110.78514862060547, 36.69033432006836, 106.82991027832031, 142.20635986328125, 166.55337524414062, 232.29653930664062, -81.87417602539062, 308.13128662109375, -12.223894119262695, 121.61994171142578, 169.8173828125, 602.46630859375, 177.96910095214844, 54.364227294921875, -227.47401428222656, 321.45928955078125, -24.253097534179688, 126.9819564819336, 572.6273803710938, 301.2974853515625, 137.90615844726562, 37.49523162841797, 240.55120849609375, 14.9560546875, 124.4302978515625, -32.19563674926758, 54.31496810913086, 314.6539306640625, 59.93979263305664, 148.39730834960938, 363.7523193359375, 411.631591796875, 252.11642456054688, 288.3952331542969, -8.902477264404297, 230.4401092529297, 63.46711349487305, 160.048828125, 143.4510955810547, 65.3846435546875, 301.366943359375, -38.8472900390625, 175.06109619140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000212.npy"}
|
||||
{"epoch": 0.31130690161527164, "step": 213, "batch_size": 64, "mean": 127.49537658691406, "std": 154.1443328857422, "min": -220.19677734375, "p10": -56.45011901855467, "median": 114.82050704956055, "p90": 334.0070343017578, "max": 437.17547607421875, "pos_frac": 0.8125, "sample": [298.3770751953125, -90.03044128417969, 35.664817810058594, -105.56806182861328, 212.72703552246094, 15.019515991210938, 65.55990600585938, 106.85406494140625, -220.19677734375, -85.66238403320312, 9.91547966003418, 308.39727783203125, 340.65496826171875, -36.74962615966797, 84.51775360107422, 177.02940368652344, 10.38560676574707, 62.19172668457031, 158.40818786621094, 323.36083984375, 437.17547607421875, 326.5646667480469, 57.58863067626953, 416.01702880859375, 173.60169982910156, 283.16241455078125, 391.63861083984375, -19.81783676147461, -4.834236145019531, 13.31844711303711, 188.38385009765625, 125.10991668701172, 206.235595703125, 122.78694915771484, -43.0357666015625, 8.838134765625, 169.65240478515625, 330.2445983886719, 288.704345703125, 148.23580932617188, -12.982921600341797, 198.5026092529297, 335.6195068359375, -62.199127197265625, 216.5608673095703, 64.82322692871094, 185.4609832763672, 429.0738830566406, -140.9158935546875, 216.68026733398438, 249.86410522460938, 306.70001220703125, 352.20050048828125, 41.73091506958008, 15.538589477539062, 5.872159957885742, -126.21843719482422, 70.2559814453125, 100.58341217041016, 65.3078384399414, 36.352455139160156, 21.705963134765625, 168.0009002685547, 130.75894165039062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000213.npy"}
|
||||
{"epoch": 0.31277533039647576, "step": 214, "batch_size": 64, "mean": 114.51547241210938, "std": 148.62936401367188, "min": -173.6182861328125, "p10": -50.41845474243164, "median": 85.505859375, "p90": 297.7182067871094, "max": 521.81689453125, "pos_frac": 0.765625, "sample": [74.33052825927734, 415.6233825683594, 17.060604095458984, 196.58616638183594, 1.9168872833251953, -14.394319534301758, -85.19032287597656, -173.6182861328125, 277.40875244140625, -79.03759765625, 171.72927856445312, 42.53052520751953, 96.68119049072266, -0.8738842010498047, 33.27362060546875, 108.41928100585938, 226.99456787109375, -27.0263614654541, 50.17674255371094, 190.4141845703125, 64.04646301269531, 71.6361312866211, 112.32073211669922, 264.4101257324219, 252.13046264648438, 70.7772445678711, 16.807228088378906, -0.07056999206542969, 128.93067932128906, -44.72908020019531, 107.0411376953125, 64.08576202392578, -0.44367218017578125, 333.48541259765625, -79.77558135986328, 521.81689453125, 350.0155334472656, 11.596145629882812, 27.499359130859375, 404.4286193847656, 203.2631072998047, 205.33316040039062, 269.15447998046875, 301.9373779296875, -149.18746948242188, 514.5845947265625, 30.36432456970215, 189.92678833007812, 216.93878173828125, 105.87972259521484, -50.86054229736328, 173.7879180908203, -48.602264404296875, 127.57806396484375, -49.38691711425781, 245.14938354492188, 62.49257278442383, 287.87347412109375, 71.47914123535156, 154.30252075195312, 121.45848083496094, 135.53636169433594, -52.080322265625, 63.05411911010742], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000214.npy"}
|
||||
{"epoch": 0.3142437591776799, "step": 215, "batch_size": 64, "mean": 120.56410217285156, "std": 138.86724853515625, "min": -145.90821838378906, "p10": -64.28058013916015, "median": 99.3742904663086, "p90": 295.42076416015624, "max": 508.66497802734375, "pos_frac": 0.828125, "sample": [69.77198791503906, 325.1129150390625, 120.27053833007812, 267.7493896484375, -61.89117431640625, -79.5411376953125, 42.54015350341797, 120.11298370361328, 137.25604248046875, 102.47517395019531, 141.2256622314453, 50.40895080566406, 2.9151554107666016, -34.27849578857422, 434.78948974609375, 260.5967712402344, 258.06585693359375, 295.64276123046875, 508.66497802734375, 175.50120544433594, 96.27340698242188, 399.2252197265625, 111.05213165283203, -91.30503845214844, 73.35060119628906, 201.223876953125, -66.88702392578125, 72.89624786376953, 191.929931640625, 257.0840759277344, 30.42049217224121, 81.32595825195312, 80.63130187988281, -13.585836410522461, 173.34042358398438, 94.4883804321289, -131.44992065429688, 149.0444793701172, 197.13290405273438, 274.2125549316406, 339.12103271484375, 94.39273834228516, 206.28717041015625, -3.51116943359375, 151.0570068359375, 51.64453125, 42.39423370361328, -65.30461120605469, 48.78739547729492, 218.96839904785156, -145.90821838378906, 137.608642578125, 407.65447998046875, 31.47058868408203, 92.84175109863281, 89.47265625, 294.90277099609375, 7.4226837158203125, 196.88673400878906, 39.57530212402344, 107.1328353881836, 40.433349609375, -93.1029052734375, 108.08177947998047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000215.npy"}
|
||||
{"epoch": 0.315712187958884, "step": 216, "batch_size": 64, "mean": 119.37692260742188, "std": 171.15626525878906, "min": -287.81964111328125, "p10": -73.27203598022462, "median": 101.08648681640625, "p90": 375.97046203613286, "max": 545.8858642578125, "pos_frac": 0.75, "sample": [217.5894775390625, 411.5614013671875, -73.62174224853516, 140.32168579101562, 210.44424438476562, 31.24641990661621, -184.85504150390625, 160.79263305664062, 218.2715606689453, 30.51325225830078, 545.8858642578125, 95.63432312011719, 114.81072998046875, -72.4560546875, -31.643970489501953, -8.049407958984375, -101.78433990478516, 210.25991821289062, 41.215824127197266, -185.0753173828125, 13.038070678710938, 324.8736572265625, -17.43401336669922, -17.813446044921875, 106.53865051269531, 152.24525451660156, 45.634002685546875, 192.1237030029297, 115.39743041992188, 0.9384288787841797, 108.15158081054688, 196.25531005859375, 17.440284729003906, -52.40705108642578, 402.7347717285156, 379.5426025390625, -67.24150085449219, 300.5319519042969, 50.07342529296875, 80.71160125732422, 59.266212463378906, 271.0034484863281, -2.263235092163086, -287.81964111328125, 69.39093017578125, 186.4931640625, 244.61764526367188, 398.3502197265625, 50.927978515625, -75.73157501220703, 279.088134765625, 47.22993469238281, 116.06197357177734, 172.2982177734375, -67.90044403076172, 340.148193359375, 239.41134643554688, 347.22479248046875, 412.1685791015625, 396.18524169921875, 20.464187622070312, -79.279052734375, 32.75518035888672, 367.6354675292969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000216.npy"}
|
||||
{"epoch": 0.31718061674008813, "step": 217, "batch_size": 64, "mean": 140.3418731689453, "std": 160.13815307617188, "min": -179.71376037597656, "p10": -21.40581588745117, "median": 96.39009475708008, "p90": 372.1501647949219, "max": 548.4759521484375, "pos_frac": 0.84375, "sample": [136.15335083007812, 139.72027587890625, 95.9944839477539, 140.40072631835938, 61.55290222167969, 58.038780212402344, 71.64720153808594, 236.70921325683594, 78.5298080444336, 92.14666748046875, 48.01953125, 367.71881103515625, 145.72015380859375, 374.04931640625, -119.40674591064453, -1.2156295776367188, 40.77074432373047, 2.053802490234375, 203.88677978515625, 548.4759521484375, -3.443889617919922, 60.280799865722656, 293.37322998046875, 436.9808654785156, 98.3645248413086, 212.25863647460938, -32.27515411376953, 70.13555908203125, -179.71376037597656, 122.93338012695312, 505.5458679199219, 199.74993896484375, 6.679014205932617, 60.41828918457031, 267.6580810546875, 2.1343536376953125, 256.78460693359375, 290.95611572265625, -19.118484497070312, 503.942626953125, 439.94781494140625, -94.41675567626953, 351.00225830078125, 276.97149658203125, 138.43028259277344, 474.0376892089844, 20.446975708007812, 96.78570556640625, 69.624267578125, 71.14757537841797, 63.77159118652344, 178.82748413085938, 254.97119140625, 6.158052444458008, 148.2363739013672, 74.34062957763672, -45.76189422607422, -22.38610076904297, 28.644126892089844, -52.81559753417969, 137.71669006347656, 213.46141052246094, 253.83009338378906, 24.297555923461914], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000217.npy"}
|
||||
{"epoch": 0.3186490455212922, "step": 218, "batch_size": 64, "mean": 108.9049301147461, "std": 165.6693878173828, "min": -274.9143371582031, "p10": -75.04668884277343, "median": 90.21390151977539, "p90": 333.2731048583985, "max": 582.289794921875, "pos_frac": 0.734375, "sample": [94.13813781738281, 62.54029083251953, 171.613525390625, 267.6275634765625, 149.76321411132812, 29.965030670166016, 335.8528137207031, 72.27986145019531, 14.1854248046875, 52.72924041748047, 152.11692810058594, 48.580902099609375, -16.15660858154297, 121.69444274902344, 454.772705078125, 181.45741271972656, 126.66897583007812, -21.976043701171875, 142.8729248046875, 195.15420532226562, 43.382110595703125, 58.243743896484375, -3.566059112548828, 235.30374145507812, -65.38652038574219, -274.9143371582031, 327.2537841796875, 99.0594482421875, 194.57119750976562, 203.1854248046875, 32.87928009033203, 424.74029541015625, 441.30938720703125, 257.4024353027344, 203.4501953125, 65.98629760742188, -20.31329917907715, -22.54557991027832, 93.62789154052734, -20.66519546508789, -29.239166259765625, -101.20576477050781, 149.32070922851562, -109.34506225585938, -74.24945831298828, -53.24256134033203, 582.289794921875, 347.0062255859375, 202.1929473876953, -75.38835906982422, -145.23062133789062, 240.09925842285156, 458.6287841796875, 11.637826919555664, 51.05746841430664, 86.79991149902344, 166.9441680908203, 99.59188842773438, 43.60729217529297, -175.23507690429688, 80.2419204711914, -124.44566345214844, 323.3822021484375, 105.81155395507812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000218.npy"}
|
||||
{"epoch": 0.3201174743024963, "step": 219, "batch_size": 64, "mean": 91.93537139892578, "std": 177.6659393310547, "min": -452.5419616699219, "p10": -114.98924636840817, "median": 78.78208541870117, "p90": 354.6544158935548, "max": 485.6072998046875, "pos_frac": 0.734375, "sample": [460.898681640625, 147.96725463867188, 3.4668407440185547, 327.320556640625, 3.980356216430664, 428.61444091796875, -452.5419616699219, 10.941953659057617, 38.309844970703125, 303.6020812988281, -15.153358459472656, 96.38605499267578, 204.26296997070312, 445.92816162109375, 485.6072998046875, 183.81861877441406, 57.635406494140625, 254.04574584960938, 107.89599609375, 139.32003784179688, 279.91082763671875, 26.121719360351562, -130.71270751953125, -6.25439453125, -68.3294448852539, -68.92733001708984, 367.7365417480469, -4.251556396484375, 366.3689270019531, 397.8607177734375, 38.95764923095703, 270.5912780761719, -0.0704498291015625, 48.09291076660156, 17.113418579101562, 175.3725128173828, 118.1836929321289, 16.38025665283203, 113.66162109375, 37.10284423828125, 290.7876281738281, 134.1299591064453, 90.25247192382812, -176.82925415039062, 86.62388610839844, 161.94248962402344, -198.19869995117188, 6.684568405151367, 6.200037002563477, 245.73178100585938, 29.7393798828125, -134.40301513671875, -0.9109878540039062, -48.97929000854492, -226.6623992919922, 91.21768951416016, 164.1097412109375, 83.17391204833984, -90.27992248535156, -35.48945617675781, 141.84823608398438, 74.3902587890625, 87.1474380493164, -125.5789566040039], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000219.npy"}
|
||||
{"epoch": 0.32158590308370044, "step": 220, "batch_size": 64, "mean": 78.98377990722656, "std": 129.20367431640625, "min": -261.66680908203125, "p10": -71.09269332885742, "median": 80.76587677001953, "p90": 206.6965484619141, "max": 446.4186096191406, "pos_frac": 0.75, "sample": [189.26089477539062, 53.006629943847656, 63.485809326171875, -22.525596618652344, 45.689537048339844, 318.50823974609375, 366.0219421386719, 9.959354400634766, 151.98158264160156, 160.4783477783203, 123.71045684814453, 271.99798583984375, 137.114990234375, 149.069580078125, -225.5634765625, 111.11056518554688, 177.50660705566406, 33.518646240234375, 137.12448120117188, 208.86524963378906, 85.80781555175781, 23.55415916442871, 1.2015228271484375, -261.66680908203125, -71.6962661743164, 201.63624572753906, -26.124719619750977, 51.854644775390625, 209.4052734375, 69.3643798828125, -69.68435668945312, 156.74659729003906, 133.63255310058594, -21.857223510742188, 216.55747985839844, 118.00127410888672, 446.4186096191406, 173.45164489746094, -17.249794006347656, 108.61132049560547, 16.77656364440918, 52.37704086303711, 0.7118320465087891, 110.05656433105469, -114.69268035888672, 159.51333618164062, 61.07940673828125, 135.22410583496094, -141.14080810546875, 75.72393798828125, 71.43183898925781, -2.2449722290039062, 139.6038818359375, -42.84632873535156, 43.07107925415039, 157.81680297851562, 184.65731811523438, -61.629783630371094, 176.79925537109375, 189.60794067382812, -17.622997283935547, 156.9139404296875, -137.42724609375, -147.08425903320312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000220.npy"}
|
||||
{"epoch": 0.32305433186490456, "step": 221, "batch_size": 64, "mean": 171.51882934570312, "std": 163.33065795898438, "min": -186.412109375, "p10": -21.65741424560545, "median": 185.17830657958984, "p90": 386.34936828613286, "max": 551.36181640625, "pos_frac": 0.84375, "sample": [79.90889739990234, 107.49627685546875, 205.31752014160156, -6.467491149902344, 105.08843231201172, 551.36181640625, 6.127662658691406, 390.44940185546875, 200.57723999023438, 293.072021484375, 293.4239501953125, 10.736209869384766, 290.8353271484375, 240.18991088867188, 145.4691925048828, 271.09796142578125, -4.359130859375, 450.7160949707031, -5.729677200317383, 93.66388702392578, 211.84515380859375, 305.95794677734375, 403.48956298828125, 476.7333984375, 466.119873046875, 254.8262939453125, 223.8694610595703, -186.412109375, 56.186622619628906, 68.43599700927734, 173.58148193359375, 197.33102416992188, -31.354888916015625, 64.25176239013672, 66.50936889648438, 180.55032348632812, 113.52033996582031, -136.8297119140625, 259.5307312011719, 371.78045654296875, -28.167381286621094, 287.04962158203125, 255.2700958251953, 327.851806640625, 121.83984375, 233.416015625, 34.82417297363281, 376.7826232910156, 320.85284423828125, 231.29638671875, -46.207759857177734, -104.47075653076172, 15.690177917480469, 259.70904541015625, 41.75636291503906, -77.03863525390625, 17.728113174438477, 120.25049591064453, 46.03274917602539, 203.52713012695312, 311.074462890625, 93.31126403808594, 486.1201171875, 189.80628967285156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000221.npy"}
|
||||
{"epoch": 0.3245227606461087, "step": 222, "batch_size": 64, "mean": 128.40843200683594, "std": 162.7919921875, "min": -232.00411987304688, "p10": -46.432830810546875, "median": 116.58121871948242, "p90": 360.4207580566407, "max": 559.7192993164062, "pos_frac": 0.78125, "sample": [-43.03681182861328, 446.0048522949219, -7.9540252685546875, -104.01502990722656, -5.108421325683594, -86.70646667480469, -20.582183837890625, 116.124267578125, 0.467498779296875, 226.52130126953125, 98.4561996459961, 123.49153900146484, 421.0897216796875, 90.88510131835938, 121.52275848388672, 242.86715698242188, -232.00411987304688, 151.68435668945312, 258.0392761230469, 176.29090881347656, 152.29698181152344, 108.94233703613281, -205.8719482421875, 325.4255065917969, -127.00594329833984, 269.345458984375, 492.72955322265625, 93.98335266113281, 17.726661682128906, 17.719772338867188, 160.69778442382812, 23.319976806640625, 210.83387756347656, -47.888267517089844, 205.7796173095703, 200.0562286376953, 47.696563720703125, 153.42721557617188, -15.304264068603516, 221.73101806640625, -30.195167541503906, 99.83979797363281, 81.81082916259766, 68.83699035644531, 338.2991943359375, 240.64071655273438, 117.03816986083984, 238.55889892578125, 147.8906707763672, 47.906097412109375, -28.427810668945312, 163.6356201171875, 369.90142822265625, 62.53252029418945, 423.0995178222656, 190.12496948242188, 133.95896911621094, -106.51031494140625, 222.25314331054688, 18.86376190185547, 559.7192993164062, 56.083648681640625, 416.6297607421875, 105.96987915039062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000222.npy"}
|
||||
{"epoch": 0.32599118942731276, "step": 223, "batch_size": 64, "mean": 107.97784423828125, "std": 181.55764770507812, "min": -337.70062255859375, "p10": -117.41091156005858, "median": 108.73971939086914, "p90": 310.8174224853516, "max": 630.5066528320312, "pos_frac": 0.734375, "sample": [71.96232604980469, -66.60579681396484, 222.12100219726562, 147.5714111328125, 141.65322875976562, 355.76556396484375, 196.97100830078125, 103.09610748291016, 215.52301025390625, 166.98196411132812, -173.5109100341797, 56.78852844238281, 80.35505676269531, 236.80252075195312, 294.46307373046875, 289.529052734375, 134.44412231445312, 91.59873962402344, 554.0777587890625, -13.298274993896484, 137.5894317626953, -55.318870544433594, 91.18156433105469, -66.17039489746094, -59.42662811279297, 305.6462707519531, -158.1753692626953, 377.57598876953125, 103.40213775634766, -122.32093048095703, 5.555816650390625, -2.1978302001953125, 265.9913330078125, -337.70062255859375, 114.07730102539062, 146.30422973632812, -212.63475036621094, 197.263427734375, 74.13848876953125, 630.5066528320312, 248.6856689453125, -105.9542007446289, 178.68771362304688, 270.529541015625, -181.14688110351562, -60.90251541137695, 285.481689453125, 348.1455078125, 163.55487060546875, -11.572029113769531, 51.03773498535156, 30.043426513671875, 89.24478149414062, 413.45477294921875, 26.10517120361328, 12.263313293457031, -17.941640853881836, 12.726163864135742, 144.20445251464844, 137.41970825195312, 120.20645904541016, 161.08975219726562, -259.3917541503906, 313.03363037109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000223.npy"}
|
||||
{"epoch": 0.3274596182085169, "step": 224, "batch_size": 64, "mean": 123.69600677490234, "std": 182.18006896972656, "min": -335.1512451171875, "p10": -76.44371643066407, "median": 67.18981170654297, "p90": 363.4639434814454, "max": 665.755859375, "pos_frac": 0.828125, "sample": [249.8376007080078, 3.944061279296875, 143.77452087402344, 16.413475036621094, -76.7000503540039, 552.7578125, 240.7713165283203, 49.172767639160156, 62.864280700683594, -335.1512451171875, 53.59784698486328, 172.6528778076172, 158.29238891601562, 146.3477783203125, 440.130859375, 88.29759216308594, 54.505332946777344, 195.3517608642578, 179.25765991210938, 665.755859375, 279.2227783203125, 110.056884765625, 310.79730224609375, 249.81179809570312, -134.89332580566406, -79.02719116210938, 271.78924560546875, 68.24714660644531, 234.53085327148438, -12.794692993164062, 33.10127258300781, 46.106964111328125, 36.548728942871094, 634.0343017578125, 260.97650146484375, 243.7452392578125, 113.12805938720703, -78.75406646728516, 22.514631271362305, 43.471248626708984, 225.05313110351562, 14.50311279296875, 48.885169982910156, -29.47064971923828, 104.07247924804688, -75.8456039428711, -61.54364013671875, 2.0126495361328125, -102.60307312011719, 30.675426483154297, 173.90316772460938, 145.14602661132812, 375.60003662109375, 19.801347732543945, 7.815456390380859, 66.13247680664062, 4.158935546875, 335.1463928222656, 37.440467834472656, -81.303955078125, 92.67045593261719, 425.76776123046875, 422.1202697753906, 21.918651580810547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000224.npy"}
|
||||
{"epoch": 0.328928046989721, "step": 225, "batch_size": 64, "mean": 144.7752227783203, "std": 180.4758758544922, "min": -247.81985473632812, "p10": -85.29101943969727, "median": 148.50826263427734, "p90": 391.7450988769532, "max": 706.3538818359375, "pos_frac": 0.765625, "sample": [236.08026123046875, 169.7027587890625, 408.18804931640625, 366.2476806640625, 35.173927307128906, 96.2138442993164, 33.515106201171875, -92.57647705078125, -139.15829467773438, 420.8067626953125, 374.1573791503906, 288.7033386230469, -145.2324676513672, -80.46514129638672, 268.65863037109375, -26.764812469482422, -102.02052307128906, 3.422323226928711, -20.635005950927734, 75.98597717285156, 114.26492309570312, 122.35556030273438, 229.86935424804688, 160.17706298828125, -10.078851699829102, 148.580322265625, 47.360260009765625, 165.8131561279297, 2.556171417236328, 238.42276000976562, 303.2877197265625, 167.51739501953125, 220.2473907470703, 109.18982696533203, -43.79216384887695, 480.94329833984375, 430.8546142578125, 7.492515563964844, 243.90640258789062, 398.9818115234375, 101.75390625, 331.2802734375, 186.34451293945312, -40.13762664794922, 162.34426879882812, 135.37637329101562, 706.3538818359375, 283.7306823730469, 153.24404907226562, 81.91682434082031, 416.54339599609375, 7.71270751953125, 242.4810333251953, -100.02685546875, -87.3592529296875, 374.85943603515625, 303.525634765625, 172.10809326171875, 168.75498962402344, -247.81985473632812, 113.4870834350586, 148.4362030029297, -49.129310607910156, -8.11883544921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000225.npy"}
|
||||
{"epoch": 0.3303964757709251, "step": 226, "batch_size": 64, "mean": 113.28193664550781, "std": 171.46205139160156, "min": -226.19210815429688, "p10": -39.67925643920897, "median": 77.83613967895508, "p90": 333.5439331054688, "max": 589.9083251953125, "pos_frac": 0.71875, "sample": [589.9083251953125, -160.73788452148438, -44.718135833740234, 16.368804931640625, -20.26910400390625, 169.50025939941406, -7.4678955078125, 65.92857360839844, 176.462890625, -27.921871185302734, 110.04888153076172, -0.7729034423828125, -26.73438262939453, 27.793792724609375, -170.60240173339844, 96.09172058105469, -120.84037780761719, 327.2294921875, 288.1945495605469, 26.28734588623047, 336.2501220703125, 361.517578125, 79.85187530517578, 127.25889587402344, 187.7667999267578, 288.8187255859375, 19.294761657714844, -23.817123413085938, 128.13360595703125, 454.1589660644531, 295.06964111328125, -79.24669647216797, 189.29466247558594, 76.84947204589844, 42.71783447265625, 533.793701171875, 255.75839233398438, 489.25341796875, 284.5962829589844, -11.001792907714844, -5.47174072265625, 148.8287353515625, -226.19210815429688, 154.52783203125, 42.136287689208984, 104.98237609863281, 10.137031555175781, -9.222198486328125, 9.280832290649414, 78.82280731201172, -3.627532958984375, 253.5681610107422, 109.44940185546875, 197.32394409179688, 41.682716369628906, 124.60841369628906, -93.31005859375, 5.946128845214844, 506.5186767578125, 59.267494201660156, 34.781410217285156, 150.07347106933594, -8.48846435546875, 214.3511962890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000226.npy"}
|
||||
{"epoch": 0.33186490455212925, "step": 227, "batch_size": 64, "mean": 144.4818878173828, "std": 172.9486083984375, "min": -364.01153564453125, "p10": -14.627217864990227, "median": 117.03218460083008, "p90": 368.58688659667973, "max": 627.6432495117188, "pos_frac": 0.859375, "sample": [319.3691101074219, 105.04501342773438, 162.24948120117188, 381.21673583984375, 220.92138671875, 45.83330535888672, 50.60633850097656, 114.41802215576172, 311.349609375, 8.663833618164062, 117.66187286376953, 124.00363159179688, 179.68807983398438, 27.504995346069336, -17.515125274658203, 28.847396850585938, 164.90298461914062, 98.40834045410156, 200.5847625732422, 627.6432495117188, 156.47691345214844, 106.5072021484375, 322.2635192871094, 204.46786499023438, 59.99872589111328, 291.447021484375, 162.66522216796875, 244.65719604492188, 116.40249633789062, 91.89297485351562, 150.6806640625, 65.31315612792969, 332.15765380859375, 4.910785675048828, -82.77366638183594, -112.31852722167969, 196.94189453125, 212.94186401367188, 507.73828125, 111.45710754394531, 181.39398193359375, 441.756103515625, -100.09335327148438, 580.0377197265625, 0.09414482116699219, 20.490455627441406, 178.3286590576172, 42.567787170410156, 219.72842407226562, 38.66710662841797, 127.99020385742188, 375.86260986328125, 103.7835693359375, -3.9004974365234375, 7.117387771606445, -40.905426025390625, 351.6101989746094, -364.01153564453125, -7.888767242431641, 84.90982055664062, -78.42609405517578, 23.694948196411133, 138.59115600585938, 510.2109680175781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000227.npy"}
|
||||
{"epoch": 0.3333333333333333, "step": 228, "batch_size": 64, "mean": 101.45246887207031, "std": 163.22012329101562, "min": -279.23333740234375, "p10": -103.75468597412109, "median": 105.03263092041016, "p90": 312.44248352050784, "max": 499.25787353515625, "pos_frac": 0.75, "sample": [-176.57730102539062, -238.177001953125, -279.23333740234375, 222.22573852539062, 136.34390258789062, 328.86468505859375, -185.78793334960938, 175.81671142578125, 182.68069458007812, -151.88827514648438, 9.70367431640625, 283.2176208496094, 499.25787353515625, 315.0797424316406, 437.38916015625, 323.0419921875, 351.8504638671875, 167.96142578125, 122.56837463378906, -30.574630737304688, -50.31599426269531, 288.703125, 108.41114044189453, 306.28887939453125, 179.3455810546875, 160.6101837158203, 132.24705505371094, 21.85150146484375, -104.34020233154297, 118.17106628417969, 30.216781616210938, 193.45968627929688, 6.491401672363281, 198.195068359375, 223.35464477539062, 88.49284362792969, 120.48136138916016, 80.83331298828125, 33.47328186035156, 47.63250732421875, 98.0411605834961, 237.73602294921875, -75.20166015625, -14.412216186523438, 93.38337707519531, 101.74605560302734, 433.2698059082031, 12.076118469238281, 79.91343688964844, 53.375038146972656, 261.79840087890625, 0.058132171630859375, -24.325355529785156, 175.31707763671875, 226.7051544189453, 165.12713623046875, 200.4992218017578, 108.31920623779297, -102.38848114013672, -30.012786865234375, -20.07630157470703, 29.151397705078125, -148.03909301757812, -46.469947814941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000228.npy"}
|
||||
{"epoch": 0.33480176211453744, "step": 229, "batch_size": 64, "mean": 108.77888488769531, "std": 161.3918914794922, "min": -387.73236083984375, "p10": -54.27392349243163, "median": 95.43351364135742, "p90": 338.7661926269531, "max": 549.0750732421875, "pos_frac": 0.75, "sample": [-26.332443237304688, 14.122241973876953, -117.21099853515625, 403.0106201171875, 54.92599105834961, -195.10226440429688, 108.76737213134766, 190.45538330078125, 64.63548278808594, 218.61557006835938, 249.43365478515625, 340.1416320800781, 100.95175170898438, 363.5093688964844, 91.11814880371094, 99.7488784790039, 254.72503662109375, 106.2666244506836, 40.84596633911133, 336.9079284667969, -88.7003173828125, 66.92666625976562, -3.022502899169922, 219.68971252441406, 145.2178955078125, 40.94464874267578, 549.0750732421875, -160.8388671875, 48.18043518066406, 243.1539306640625, 146.40707397460938, 246.18206787109375, 183.69070434570312, 364.6563720703125, 339.5625915527344, 39.935768127441406, 316.58441162109375, 29.10291290283203, 84.12652587890625, -58.49432373046875, 71.6957778930664, 112.16291809082031, 44.13896942138672, 190.94224548339844, -44.42632293701172, 156.71795654296875, 168.94024658203125, 225.60911560058594, -30.984329223632812, -140.70492553710938, 107.57612609863281, 427.87542724609375, 30.281038284301758, 82.75848388671875, 85.81249237060547, 132.7230987548828, -8.201774597167969, 181.13482666015625, -15.025833129882812, 176.90904235839844, -387.73236083984375, -0.6114273071289062, -18.440170288085938, -39.21875762939453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000229.npy"}
|
||||
{"epoch": 0.33627019089574156, "step": 230, "batch_size": 64, "mean": 165.08201599121094, "std": 161.2410125732422, "min": -251.34365844726562, "p10": -31.807445907592772, "median": 155.02716064453125, "p90": 407.29953308105473, "max": 510.2415771484375, "pos_frac": 0.84375, "sample": [77.03004455566406, 366.47674560546875, -22.506574630737305, 306.9488525390625, 209.72845458984375, 79.41631317138672, -41.234527587890625, 255.60801696777344, 95.09637451171875, 133.27291870117188, 213.5634307861328, 170.28073120117188, 18.923606872558594, 233.03207397460938, 18.225540161132812, 114.26058959960938, -57.946510314941406, 85.80928039550781, 99.08578491210938, 145.8090362548828, 41.83349609375, -96.49958801269531, -32.85291290283203, 202.1253662109375, 145.06631469726562, 179.27980041503906, 213.96192932128906, 340.0065612792969, 327.2413635253906, 156.18826293945312, 80.85322570800781, 31.002822875976562, 332.0582275390625, 452.5187683105469, 173.28089904785156, -122.69454193115234, 136.90509033203125, 173.21554565429688, 230.805908203125, 440.57794189453125, -29.368022918701172, -12.960189819335938, -251.34365844726562, -46.373313903808594, 46.18052673339844, 47.68995666503906, 411.5004577636719, 140.71051025390625, 375.3013610839844, 450.60137939453125, 172.69371032714844, 109.52290344238281, 103.78749084472656, 15.309366226196289, 415.32220458984375, 291.58245849609375, 484.2251281738281, 205.3268585205078, 397.49737548828125, 220.12635803222656, 230.86622619628906, 153.86605834960938, 217.18763732910156, 510.2415771484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000230.npy"}
|
||||
{"epoch": 0.3377386196769457, "step": 231, "batch_size": 64, "mean": 115.93913269042969, "std": 153.9716339111328, "min": -140.86212158203125, "p10": -45.92092666625976, "median": 92.36008834838867, "p90": 335.0958465576172, "max": 650.7132568359375, "pos_frac": 0.8125, "sample": [476.539794921875, 94.22636413574219, -42.08831024169922, 284.82147216796875, 15.066368103027344, 80.28785705566406, 340.6325988769531, 5.648193359375, 71.31570434570312, 95.34638214111328, 103.52798461914062, 351.99761962890625, -64.28712463378906, 88.56484985351562, 465.9037780761719, 145.2830352783203, 206.70924377441406, 92.15786743164062, 139.62051391601562, 206.73703002929688, 384.342529296875, 63.210147857666016, 107.48365020751953, 1.7834930419921875, 59.30104064941406, -79.54373931884766, 322.1767578125, -54.9500617980957, 213.55999755859375, 650.7132568359375, 461.8163757324219, 247.68582153320312, 127.39334106445312, 34.48096466064453, -140.86212158203125, 66.85185241699219, 107.34132385253906, 170.96990966796875, 83.06027221679688, -47.5634765625, -3.137847900390625, 34.50518798828125, 234.954345703125, 67.80465698242188, -23.91533660888672, 130.4464111328125, -23.112716674804688, -105.67623138427734, 57.40732955932617, 117.23013305664062, 305.92431640625, 165.44888305664062, 92.56230926513672, 7.879375457763672, -40.1703987121582, 116.77474975585938, -99.49122619628906, 108.17788696289062, 26.706283569335938, 18.98858642578125, 9.954910278320312, 145.29002380371094, 105.83206176757812, 32.45814514160156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000231.npy"}
|
||||
{"epoch": 0.3392070484581498, "step": 232, "batch_size": 64, "mean": 94.21968078613281, "std": 164.78802490234375, "min": -158.1475372314453, "p10": -84.68783416748046, "median": 59.586923599243164, "p90": 319.0439208984375, "max": 610.6156005859375, "pos_frac": 0.71875, "sample": [180.44973754882812, 97.40325164794922, 9.811561584472656, 57.93775177001953, 18.346847534179688, -141.74562072753906, 345.9702453613281, 253.14199829101562, -29.13201904296875, 250.21359252929688, 49.83589172363281, 465.9688720703125, 76.1180191040039, -68.75843048095703, -24.4791259765625, 64.08673858642578, -31.328033447265625, 185.79541015625, -110.53073120117188, 69.96158599853516, 155.44029235839844, 580.0111694335938, 62.488311767578125, 86.30111694335938, 136.4071044921875, -32.54263687133789, 610.6156005859375, -3.2093658447265625, 27.662567138671875, 256.6514892578125, 154.15432739257812, 206.86746215820312, -78.088623046875, 76.259521484375, -101.1923828125, 132.63290405273438, -134.68203735351562, 23.226585388183594, -87.51606750488281, 441.9376220703125, 24.655120849609375, -70.51949310302734, 1.9940643310546875, -103.51283264160156, 27.009326934814453, 85.56263732910156, 14.915771484375, -158.1475372314453, 266.58721923828125, 118.00167846679688, 350.8446044921875, 22.826976776123047, -7.85235595703125, 318.86968994140625, 223.08489990234375, -31.932512283325195, 123.94231414794922, 319.11859130859375, -24.676847457885742, 54.971229553222656, 61.2360954284668, 51.11563491821289, 72.90492248535156, 56.56798553466797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000232.npy"}
|
||||
{"epoch": 0.3406754772393539, "step": 233, "batch_size": 64, "mean": 148.33291625976562, "std": 215.9307403564453, "min": -241.99899291992188, "p10": -72.1384292602539, "median": 100.21219635009766, "p90": 476.42311096191406, "max": 687.350341796875, "pos_frac": 0.703125, "sample": [14.72703742980957, 13.36385726928711, 499.6051025390625, -33.70594787597656, 193.0578155517578, 188.41131591796875, -171.90101623535156, -113.95934295654297, -5.4619598388671875, -187.08766174316406, 113.41487121582031, 34.961090087890625, 173.74085998535156, 266.6180114746094, 258.46832275390625, 687.350341796875, 125.0441665649414, 77.59432983398438, 526.2391967773438, 324.7850341796875, 47.11793518066406, 165.79190063476562, 87.009521484375, 557.7679443359375, 25.35979461669922, 274.05194091796875, -0.0843048095703125, -72.65576171875, -201.17822265625, -52.50041580200195, 148.2392120361328, 489.0712585449219, 206.95257568359375, 170.95399475097656, 304.39337158203125, 478.7880554199219, 33.166908264160156, 435.8603515625, -70.93132019042969, 41.16169738769531, 115.63622283935547, 414.14630126953125, -30.062877655029297, 341.6947937011719, 71.14602661132812, 372.3653564453125, -66.89881896972656, -241.99899291992188, -138.0094451904297, -24.939117431640625, 47.017066955566406, -29.525840759277344, 337.906982421875, 84.32902526855469, -31.559814453125, 212.4193878173828, -25.627422332763672, 470.9049072265625, -35.786624908447266, 619.859619140625, 388.56158447265625, 62.48298645019531, 318.11285400390625, 207.53036499023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000233.npy"}
|
||||
{"epoch": 0.342143906020558, "step": 234, "batch_size": 64, "mean": 155.75323486328125, "std": 176.22540283203125, "min": -158.68519592285156, "p10": -62.04456939697264, "median": 129.79589080810547, "p90": 383.16207580566413, "max": 668.1612548828125, "pos_frac": 0.796875, "sample": [-72.71743774414062, 290.07427978515625, 271.7322998046875, 244.9324951171875, 365.60894775390625, 90.0645980834961, 166.70028686523438, 188.2913055419922, 431.6802062988281, -46.391265869140625, 286.6858825683594, 150.34571838378906, -158.68519592285156, -2.5822105407714844, 87.89730072021484, 263.7159729003906, 78.54443359375, -76.3824234008789, 75.10546875, 301.5477294921875, 668.1612548828125, 135.16165161132812, -9.392801284790039, -31.707984924316406, 75.7470932006836, -32.78633117675781, 178.785888671875, 349.7697448730469, 10.4371337890625, 346.190673828125, -14.048032760620117, 153.78524780273438, 442.61181640625, 112.84284973144531, 525.6820068359375, 124.1641845703125, 41.77709197998047, 178.79782104492188, 63.13932418823242, 284.13299560546875, 279.2776184082031, 136.459228515625, 262.2406311035156, 86.82654571533203, 18.65894317626953, 86.72306823730469, -97.46946716308594, 2.4723281860351562, 253.79190063476562, 104.5467529296875, 89.79450225830078, 445.015625, 603.2271728515625, -77.4292984008789, 300.60931396484375, 177.07455444335938, -68.75312805175781, 1.1903209686279297, 183.85731506347656, 53.55976867675781, 124.43013000488281, -109.788818359375, 390.6848449707031, 181.78717041015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000234.npy"}
|
||||
{"epoch": 0.3436123348017621, "step": 235, "batch_size": 64, "mean": 98.2317123413086, "std": 174.693603515625, "min": -449.4805908203125, "p10": -78.90911407470703, "median": 110.44636535644531, "p90": 305.5226104736329, "max": 512.7127075195312, "pos_frac": 0.734375, "sample": [34.401397705078125, 228.05484008789062, -449.4805908203125, 41.723907470703125, 150.4622039794922, 497.571044921875, 162.93914794921875, 319.4661865234375, -71.01235961914062, 196.01011657714844, -163.5419464111328, -27.649749755859375, 141.16012573242188, 291.7548828125, 33.094268798828125, 37.38362121582031, -76.55604553222656, 26.309059143066406, 107.10951232910156, -275.6221008300781, 23.337154388427734, 22.074493408203125, -15.549491882324219, 68.3724136352539, 354.82391357421875, 231.0092010498047, -47.94451141357422, -59.72123718261719, 176.35626220703125, 201.90809631347656, 119.7152099609375, 118.68431091308594, 311.4230651855469, -79.91757202148438, 113.78321838378906, 194.9775390625, -2.3973560333251953, 255.98037719726562, 21.079696655273438, 163.76266479492188, 28.200130462646484, 53.43479919433594, 508.7948913574219, 247.1175537109375, 183.26406860351562, 188.9575958251953, 167.43499755859375, -41.940391540527344, -183.118896484375, 155.8644561767578, 512.7127075195312, 262.17462158203125, -150.1109619140625, 3.8017120361328125, 116.43452453613281, -11.610540390014648, -82.09957122802734, 190.7126922607422, 102.48068237304688, 17.088821411132812, 192.95950317382812, 144.40025329589844, 372.7319030761719, -68.19102478027344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000235.npy"}
|
||||
{"epoch": 0.34508076358296624, "step": 236, "batch_size": 64, "mean": 159.12815856933594, "std": 144.53160095214844, "min": -94.7965087890625, "p10": -14.72569351196288, "median": 130.34526824951172, "p90": 372.09518737792973, "max": 486.8313293457031, "pos_frac": 0.875, "sample": [379.3598937988281, 63.586875915527344, 21.384201049804688, 159.84951782226562, 202.0568389892578, 135.7543182373047, 438.6578063964844, 23.240379333496094, 77.01878356933594, 132.77012634277344, 201.145751953125, 364.3888854980469, 103.05833435058594, 234.4566650390625, 311.9425964355469, 318.5939636230469, 50.077911376953125, 252.2601776123047, 12.483329772949219, 375.39788818359375, 134.84844970703125, -34.269317626953125, 77.1644515991211, 293.8656921386719, 237.92855834960938, 27.337520599365234, 144.93539428710938, 380.43804931640625, 245.89797973632812, 37.716819763183594, 461.1965026855469, 106.4653091430664, -41.06806945800781, 248.3561248779297, -4.673191070556641, 247.0482177734375, 119.8873062133789, 457.0302734375, 97.62084197998047, -19.03390884399414, 125.31397247314453, 118.24469757080078, 29.76045799255371, 45.38178253173828, 12.507801055908203, 211.31800842285156, 108.22608947753906, 113.26946258544922, -69.77285766601562, -94.7965087890625, 144.32107543945312, 486.8313293457031, 354.24322509765625, 127.92041015625, 205.17227172851562, 121.94010925292969, 323.3774108886719, 102.8932113647461, 4.6552734375, 192.1196746826172, -63.94232177734375, 337.02667236328125, -31.468341827392578, 203.48219299316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000236.npy"}
|
||||
{"epoch": 0.3465491923641703, "step": 237, "batch_size": 64, "mean": 119.61394500732422, "std": 174.1492156982422, "min": -335.4542541503906, "p10": -73.18541259765624, "median": 111.21974563598633, "p90": 376.6554138183596, "max": 662.3936767578125, "pos_frac": 0.765625, "sample": [173.4807586669922, 201.84222412109375, 194.333984375, 154.29556274414062, -0.8100318908691406, 121.33728790283203, 95.60669708251953, 147.25360107421875, 32.69504928588867, 177.01275634765625, 55.68505096435547, 31.682998657226562, 418.38128662109375, 129.06539916992188, 226.18917846679688, -61.73834228515625, -104.92527770996094, 142.1345977783203, -76.34366607666016, 93.49360656738281, 138.5714111328125, -104.06349182128906, 399.9534912109375, 662.3936767578125, -67.47691345214844, 399.2975769042969, 323.8236999511719, 4.552947998046875, 6.5082244873046875, 17.763500213623047, 484.3402099609375, 102.73078918457031, 217.7996368408203, 230.4561309814453, 13.811317443847656, 204.66468811035156, 29.26551055908203, 146.46249389648438, 301.1125183105469, -42.27991485595703, 216.13003540039062, 84.00508880615234, 173.67758178710938, 255.13803100585938, 409.02825927734375, 44.67559814453125, -3.67132568359375, 19.84508514404297, -335.4542541503906, 285.1650695800781, -51.295936584472656, 72.04451751708984, -162.00584411621094, -46.34584045410156, 187.19073486328125, 279.4932861328125, -75.63191223144531, 445.87615966796875, 6.219474792480469, -122.15359497070312, -51.533447265625, 119.70870208740234, 70.8634033203125, 213.96322631835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000237.npy"}
|
||||
{"epoch": 0.34801762114537443, "step": 238, "batch_size": 64, "mean": 119.60325622558594, "std": 171.10052490234375, "min": -173.07174682617188, "p10": -42.20869140624998, "median": 81.90255355834961, "p90": 284.00517578125005, "max": 767.708984375, "pos_frac": 0.765625, "sample": [-14.824783325195312, -24.153335571289062, 5.6010589599609375, 141.1007537841797, 19.221271514892578, -4.254852294921875, 134.69224548339844, 278.6541748046875, -111.15548706054688, 412.95989990234375, 76.91091918945312, 162.3740692138672, 75.64335632324219, 61.98349380493164, -11.301813125610352, 187.72117614746094, 286.2984619140625, -70.20500183105469, 53.452415466308594, 47.50715255737305, 6.735715866088867, 663.708984375, 33.12919616699219, 145.7330780029297, -49.94670104980469, 504.2480163574219, 100.51223754882812, 18.54553985595703, 143.00823974609375, -110.12657165527344, 134.38931274414062, 119.27902221679688, 161.92156982421875, 122.74456024169922, 357.1466064453125, 28.663196563720703, 62.14596176147461, 117.72552490234375, 767.708984375, 150.56155395507812, -10.595783233642578, 14.42718505859375, 75.8963623046875, -103.26412200927734, -15.461959838867188, 252.35302734375, 212.61419677734375, 189.99334716796875, 64.20645904541016, 425.9629211425781, 204.5967559814453, 13.397750854492188, 185.8740234375, 240.86251831054688, -173.07174682617188, 275.99420166015625, 263.9877624511719, 25.437623977661133, -11.833061218261719, -18.780303955078125, 172.41583251953125, 159.39068603515625, -62.74896240234375, 86.8941879272461], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000238.npy"}
|
||||
{"epoch": 0.34948604992657856, "step": 239, "batch_size": 64, "mean": 133.9548797607422, "std": 163.15536499023438, "min": -280.0307922363281, "p10": -41.054041290283195, "median": 124.28812789916992, "p90": 361.0255096435547, "max": 505.1696472167969, "pos_frac": 0.796875, "sample": [363.0605163574219, 175.11668395996094, -141.55418395996094, 356.27716064453125, -97.072509765625, 194.85125732421875, 67.2071533203125, 147.0050048828125, -43.486446380615234, -140.4312286376953, 50.04115676879883, 47.72308349609375, 77.42410278320312, 269.8314514160156, 76.10961151123047, 12.212635040283203, 505.1696472167969, 122.90653991699219, 180.25381469726562, 125.66971588134766, 207.33042907714844, 193.3580780029297, 243.5474395751953, -58.03895950317383, 0.09575653076171875, 46.27238464355469, 179.2210235595703, 394.9136657714844, 211.9215850830078, 107.16947937011719, -7.621282577514648, 21.335235595703125, 336.01483154296875, 407.76019287109375, -156.47239685058594, 63.56501007080078, -9.526817321777344, 365.0067138671875, -8.628768920898438, 337.1651916503906, 32.782108306884766, 348.6876220703125, 314.7690734863281, 151.13006591796875, -24.859603881835938, 42.721580505371094, -280.0307922363281, 142.00125122070312, -35.3784294128418, 150.03952026367188, 149.09808349609375, 287.80010986328125, 317.720947265625, 69.91520690917969, 468.9234619140625, 217.31675720214844, 377.45263671875, 84.11485290527344, 101.40605163574219, 126.5619888305664, 60.195404052734375, -25.50261688232422, 258.2388916015625, 15.303731918334961], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000239.npy"}
|
||||
{"epoch": 0.3509544787077827, "step": 240, "batch_size": 64, "mean": 112.62922668457031, "std": 186.5790252685547, "min": -237.2541961669922, "p10": -169.01607360839841, "median": 122.86301803588867, "p90": 359.1159820556641, "max": 572.9171752929688, "pos_frac": 0.765625, "sample": [142.4884033203125, 139.1178436279297, 164.793701171875, 99.71488189697266, 126.22151184082031, 350.14862060546875, 153.71975708007812, 414.42840576171875, -215.47998046875, -55.242042541503906, 199.46131896972656, 138.97239685058594, -225.71466064453125, 169.57623291015625, 362.9591369628906, 61.78453826904297, -185.11050415039062, 71.77864074707031, 38.73725128173828, 245.53372192382812, 172.42819213867188, 409.59210205078125, 167.39828491210938, 142.19775390625, 65.87237548828125, 3.466991424560547, -235.68191528320312, 123.0315170288086, 79.55963134765625, 497.70587158203125, 191.4110870361328, 122.69451904296875, 114.53089904785156, 21.831382751464844, -195.2496337890625, 344.79400634765625, 230.0205535888672, 176.91064453125, -211.31268310546875, 75.60680389404297, -76.50367736816406, 307.4955749511719, -131.46240234375, 37.7461051940918, 572.9171752929688, -130.63221740722656, 277.4398193359375, -29.827638626098633, 264.3836669921875, 178.3272247314453, 116.97857666015625, 432.8583984375, -39.87779235839844, 107.58025360107422, -119.66331481933594, 144.84161376953125, 102.14940643310547, -51.258880615234375, 111.986328125, 196.5825653076172, 48.09477233886719, 146.2193603515625, 484.4523010253906, -237.2541961669922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000240.npy"}
|
||||
{"epoch": 0.3524229074889868, "step": 241, "batch_size": 64, "mean": 176.90191650390625, "std": 206.40151977539062, "min": -230.8702850341797, "p10": -33.95944099426269, "median": 160.7864761352539, "p90": 440.98028869628916, "max": 853.2113037109375, "pos_frac": 0.828125, "sample": [102.42900848388672, 91.26914978027344, -224.06787109375, -22.67547035217285, 31.798065185546875, 233.08746337890625, 230.75482177734375, 464.491943359375, 83.46621704101562, 180.98910522460938, 127.60598754882812, -62.72796630859375, -21.051288604736328, 522.9176635742188, 3.975996971130371, 169.82711791992188, -34.8950309753418, 359.7945251464844, 184.2871551513672, 164.35968017578125, 361.67816162109375, 166.03598022460938, 95.2237777709961, 138.67478942871094, 853.2113037109375, 418.2663269042969, 203.85928344726562, 145.06021118164062, 598.9522705078125, 42.89793395996094, 791.8960571289062, 450.71484375, 320.4619140625, 74.50543212890625, 90.26370239257812, 163.8817596435547, 241.49655151367188, 96.63656616210938, -31.776397705078125, 168.8589324951172, 113.70056915283203, 48.18946838378906, 109.54893493652344, -9.222137451171875, 238.03167724609375, 74.78460693359375, 325.1209716796875, -68.56228637695312, 229.29434204101562, 194.9404296875, 71.8394775390625, 42.429405212402344, 278.7948913574219, 501.5081481933594, 276.3839111328125, 157.69119262695312, -170.56488037109375, 291.29327392578125, 252.08474731445312, 254.21011352539062, -230.8702850341797, 389.9983215332031, 91.75491333007812, -117.09326171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000241.npy"}
|
||||
{"epoch": 0.35389133627019087, "step": 242, "batch_size": 64, "mean": 126.48635864257812, "std": 182.338134765625, "min": -290.371826171875, "p10": -53.51850891113279, "median": 95.41917419433594, "p90": 313.29255676269537, "max": 740.7994995117188, "pos_frac": 0.796875, "sample": [-33.18549346923828, 3.8505210876464844, 170.51318359375, 269.91168212890625, 37.56515121459961, -290.371826171875, 12.628372192382812, -9.18756103515625, 319.2076416015625, 145.70220947265625, 28.214879989624023, 263.41265869140625, 650.2230224609375, -15.581361770629883, 4.256172180175781, 65.58759307861328, 132.2659912109375, 132.6822509765625, 60.997467041015625, 13.772726058959961, 227.12818908691406, 89.4012451171875, 259.129638671875, 228.66293334960938, 263.564208984375, -62.23265838623047, 91.66912078857422, 53.029502868652344, 124.65697479248047, -94.87374877929688, 158.40069580078125, 216.60498046875, 100.54425811767578, -147.72625732421875, 270.2345886230469, 108.77933502197266, 299.4906921386719, 473.74261474609375, 6.947914123535156, 87.01025390625, 95.42176818847656, 206.6499481201172, -5.866607666015625, 740.7994995117188, 27.063941955566406, 72.75687408447266, 473.11798095703125, 95.41658020019531, 11.677505493164062, -3.9102001190185547, -124.4846420288086, 447.67535400390625, 179.3648223876953, -31.15930938720703, 63.52391052246094, 126.87786865234375, 145.2564239501953, -146.6427459716797, 246.7331085205078, 240.18472290039062, 6.299112319946289, 466.5198059082031, 110.92495727539062, -65.70355987548828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000242.npy"}
|
||||
{"epoch": 0.355359765051395, "step": 243, "batch_size": 64, "mean": 155.12164306640625, "std": 187.65859985351562, "min": -309.8066101074219, "p10": -57.78369674682616, "median": 131.4586410522461, "p90": 385.7689239501953, "max": 751.2059936523438, "pos_frac": 0.859375, "sample": [386.05523681640625, 32.08065414428711, 89.65978240966797, 480.91302490234375, 198.47808837890625, 89.5630111694336, 129.6392364501953, 177.25538635253906, -86.62118530273438, -104.0468978881836, -63.816314697265625, 80.69393920898438, 230.39556884765625, 12.088937759399414, -144.9358367919922, 20.094207763671875, -135.5350341796875, 331.51409912109375, 139.2547149658203, 165.06414794921875, -309.8066101074219, 37.70274353027344, 178.54595947265625, 16.017005920410156, 21.98111915588379, 349.1318359375, 51.779754638671875, 210.50460815429688, 87.68737030029297, 417.406005859375, 293.1231689453125, 258.39910888671875, -43.70758819580078, 150.7129364013672, -91.78778076171875, 361.87152099609375, 265.86529541015625, 565.292724609375, 357.2020263671875, 252.77734375, 311.0813903808594, 125.41004180908203, 370.570068359375, 751.2059936523438, 420.854248046875, 46.81007385253906, 133.27804565429688, 45.4687385559082, 385.1008605957031, -13.876953125, 164.43505859375, 210.55245971679688, 23.6824951171875, 120.61187744140625, 510.4537658691406, 42.61090850830078, 149.02767944335938, 78.72148132324219, 5.541572570800781, 252.36557006835938, 15.450576782226562, 49.621002197265625, 67.68540954589844, 202.6287841796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000243.npy"}
|
||||
{"epoch": 0.3568281938325991, "step": 244, "batch_size": 64, "mean": 159.97470092773438, "std": 167.3809814453125, "min": -160.62986755371094, "p10": -18.313524627685542, "median": 145.09481048583984, "p90": 407.21611633300785, "max": 639.5817260742188, "pos_frac": 0.828125, "sample": [66.21037292480469, -20.33415985107422, 119.40225219726562, 173.11111450195312, 27.725460052490234, 57.90742111206055, 452.5216979980469, 131.38516235351562, -58.666473388671875, -13.241180419921875, 182.30943298339844, 236.38204956054688, 14.176483154296875, 318.5107421875, 40.19110107421875, 146.37123107910156, -4.228130340576172, 454.8946533203125, 32.989627838134766, 105.00739288330078, -160.62986755371094, 122.277587890625, 297.123779296875, -67.99398803710938, 194.349609375, 34.45758819580078, 170.98492431640625, 236.39816284179688, 155.1299591064453, 40.624786376953125, 255.3324432373047, 114.08768463134766, 315.54193115234375, 312.8215637207031, 474.904052734375, 565.3275756835938, 184.10829162597656, 411.4911804199219, 52.90568161010742, 639.5817260742188, 242.93759155273438, -13.598709106445312, 397.240966796875, 184.33627319335938, 161.10797119140625, -76.49530792236328, 152.81748962402344, -36.001861572265625, -10.837127685546875, 255.70350646972656, 33.92890930175781, 20.343238830566406, 151.3513946533203, 90.17264556884766, 1.899871826171875, 113.85342407226562, 356.84735107421875, 215.0545654296875, -62.48311233520508, 143.81838989257812, 97.83346557617188, 482.78082275390625, 222.32640075683594, 301.9913330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000244.npy"}
|
||||
{"epoch": 0.35829662261380324, "step": 245, "batch_size": 64, "mean": 124.18669128417969, "std": 179.62229919433594, "min": -421.6874694824219, "p10": -27.989055633544915, "median": 99.34983825683594, "p90": 329.10916748046884, "max": 623.2349243164062, "pos_frac": 0.796875, "sample": [120.18347930908203, 415.58447265625, 177.73272705078125, 132.86573791503906, 184.40289306640625, 161.5943145751953, 60.456642150878906, -7.6892242431640625, 217.8458251953125, 125.50323486328125, 88.13455200195312, 102.55046081542969, 159.5196990966797, 24.09958267211914, 144.29656982421875, 151.13262939453125, 213.437744140625, 286.2708740234375, 92.45883178710938, 17.675334930419922, 542.0169677734375, 126.25083923339844, -19.71255111694336, 341.6903076171875, 65.64842987060547, -4.2945404052734375, 244.00108337402344, -31.536128997802734, -209.23265075683594, -19.021690368652344, 299.753173828125, 151.99551391601562, 623.2349243164062, -184.6212921142578, 186.96356201171875, 21.156837463378906, 19.878440856933594, 96.14921569824219, 295.3259582519531, 83.89857482910156, -15.93741226196289, 212.1571044921875, 141.18406677246094, -78.33570861816406, 76.14665985107422, -421.6874694824219, 280.73297119140625, 297.40509033203125, 30.050270080566406, 221.3937225341797, 347.1995544433594, 36.84043884277344, 590.8084106445312, -81.17042541503906, -1.6633110046386719, 505.26947021484375, 78.02970886230469, 110.57044219970703, 12.067062377929688, -119.8463134765625, 75.56039428710938, 60.43659973144531, 78.45897674560547, 14.676551818847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000245.npy"}
|
||||
{"epoch": 0.35976505139500736, "step": 246, "batch_size": 64, "mean": 126.03412628173828, "std": 183.54161071777344, "min": -267.17095947265625, "p10": -74.38367843627928, "median": 84.12926864624023, "p90": 354.9515716552735, "max": 744.6967163085938, "pos_frac": 0.796875, "sample": [201.6971435546875, 347.5599365234375, 149.82907104492188, 84.41141510009766, 375.6986083984375, 211.8839874267578, 744.6967163085938, -177.80792236328125, 42.884639739990234, -267.17095947265625, 57.453575134277344, 89.3032455444336, 73.735107421875, 0.3749103546142578, 166.89254760742188, 366.33282470703125, 302.2088623046875, 73.99384307861328, 45.71287536621094, 55.83837890625, 55.11981201171875, 375.8821716308594, 145.08029174804688, -78.37886810302734, 113.69392395019531, -95.47372436523438, 217.86929321289062, -63.92950439453125, 332.8463134765625, -114.25926971435547, 4.6976318359375, -27.917686462402344, 119.50502014160156, 106.65629577636719, 308.59197998046875, 50.36974334716797, 10.818498611450195, -21.979045867919922, 5.547943115234375, 236.85150146484375, 358.1194152832031, 129.71661376953125, 36.18073272705078, 190.07589721679688, 83.84712219238281, 153.31051635742188, 78.87886047363281, -65.06156921386719, 7.831249237060547, 223.7102508544922, -80.77323913574219, 91.10289001464844, 319.4315185546875, 205.55516052246094, 52.3173828125, 57.69569396972656, 10.791332244873047, 589.0223999023438, 299.4904479980469, 607.0714111328125, -34.52300262451172, 225.79379272460938, -95.59831237792969, -4.923854827880859], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000246.npy"}
|
||||
{"epoch": 0.36123348017621143, "step": 247, "batch_size": 64, "mean": 176.29876708984375, "std": 193.0809326171875, "min": -293.2818908691406, "p10": -28.191698455810528, "median": 155.4546127319336, "p90": 443.40687255859376, "max": 642.8453369140625, "pos_frac": 0.875, "sample": [-255.6888885498047, 101.04002380371094, -91.31251525878906, 392.7423095703125, 86.57682800292969, 456.093505859375, 163.35372924804688, 436.13275146484375, -49.99365997314453, 360.31414794921875, 373.4419250488281, 470.1671447753906, 203.22821044921875, 293.6929626464844, 67.86203002929688, 314.06793212890625, 478.71221923828125, 49.51123809814453, 345.9277648925781, 306.17767333984375, 170.04489135742188, 39.51963806152344, 32.72625732421875, 122.04576873779297, 22.437728881835938, 248.1430206298828, 30.020301818847656, 84.24284362792969, 99.06746673583984, 118.09423828125, 237.75723266601562, 342.77886962890625, 92.73395538330078, 151.22964477539062, 58.89326477050781, 392.960693359375, 515.8489990234375, 354.82891845703125, 103.72349548339844, -153.01394653320312, 16.87594223022461, 198.32545471191406, -9.74871826171875, 188.776123046875, 62.55902862548828, 17.296165466308594, 159.67958068847656, 374.66497802734375, 596.110107421875, 92.77334594726562, 39.23047637939453, -125.9256591796875, 92.84431457519531, 642.8453369140625, 213.00473022460938, -36.09583282470703, 177.50396728515625, 303.24884033203125, 182.4051055908203, -293.2818908691406, 446.52435302734375, 16.663009643554688, 140.0789337158203, 220.6326904296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000247.npy"}
|
||||
{"epoch": 0.36270190895741555, "step": 248, "batch_size": 64, "mean": 153.8965606689453, "std": 138.45750427246094, "min": -165.0022430419922, "p10": -1.5450597763061502, "median": 152.35977935791016, "p90": 332.7976135253906, "max": 456.97161865234375, "pos_frac": 0.890625, "sample": [317.73138427734375, 332.2038879394531, 96.33588409423828, 175.29241943359375, 201.3985137939453, 0.6964321136474609, 152.1393585205078, 347.8431091308594, 132.3270721435547, -97.68966674804688, 17.304367065429688, 247.97259521484375, 181.62548828125, 36.020599365234375, 215.98008728027344, 58.216949462890625, 27.187074661254883, 75.34364318847656, 167.21417236328125, -6.356586456298828, 152.5802001953125, 250.02035522460938, 259.45635986328125, 383.3395690917969, 19.1002197265625, -39.94068908691406, 120.7044906616211, 332.79193115234375, 126.27595520019531, 238.88088989257812, 169.9006805419922, -165.0022430419922, 149.54550170898438, 268.77838134765625, 157.1874237060547, 191.24851989746094, 85.19235229492188, 7.8091278076171875, 8.460617065429688, 52.6853141784668, 199.02284240722656, 274.0780944824219, 182.91436767578125, 429.42547607421875, 101.57969665527344, 97.62797546386719, 447.10113525390625, 302.3161315917969, 38.451873779296875, 456.97161865234375, 19.357093811035156, 187.69717407226562, 332.800048828125, 239.14439392089844, 247.50997924804688, 358.2533874511719, -2.5056991577148438, 58.57673645019531, 117.97157287597656, 295.4755859375, 119.31578826904297, 5.032337188720703, -4.272518157958984, -102.26734924316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000248.npy"}
|
||||
{"epoch": 0.3641703377386197, "step": 249, "batch_size": 64, "mean": 84.9371109008789, "std": 188.2987518310547, "min": -362.67083740234375, "p10": -151.67853546142575, "median": 77.01847076416016, "p90": 293.06227416992186, "max": 695.1004028320312, "pos_frac": 0.671875, "sample": [-53.87730407714844, 293.13092041015625, -100.67509460449219, -99.99018859863281, 115.7939224243164, -129.11978149414062, 19.843765258789062, 129.95359802246094, 74.2965087890625, 358.5647888183594, 35.466522216796875, 58.76519775390625, 76.85044860839844, 61.96454620361328, 218.3444061279297, -362.67083740234375, -113.06132507324219, 375.816650390625, 150.8589324951172, -76.49711608886719, 159.98562622070312, 190.4327392578125, -165.05490112304688, 148.11300659179688, -33.75550842285156, 351.1961669921875, -161.34657287597656, 282.1697998046875, 150.69039916992188, 86.8541259765625, -29.475276947021484, -29.785762786865234, 236.30886840820312, 65.65513610839844, 129.97256469726562, -24.381072998046875, 183.55453491210938, -327.5758056640625, 125.68363952636719, 246.7127685546875, 210.97018432617188, -21.871856689453125, 695.1004028320312, -93.42967224121094, -205.92202758789062, -164.6812286376953, 59.696998596191406, 34.39390182495117, 112.07742309570312, 196.387939453125, 124.13684844970703, 260.6038818359375, -187.41677856445312, 267.7026672363281, 292.902099609375, 279.6849365234375, 77.18649291992188, -120.23440551757812, -33.340782165527344, 202.70863342285156, 34.248390197753906, 383.00531005859375, 410.91058349609375, 1.4419021606445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000249.npy"}
|
||||
{"epoch": 0.3656387665198238, "step": 250, "batch_size": 64, "mean": 158.41148376464844, "std": 160.35458374023438, "min": -175.4219970703125, "p10": -33.96413478851318, "median": 151.82342529296875, "p90": 386.34230346679703, "max": 513.8613891601562, "pos_frac": 0.828125, "sample": [262.6365661621094, 20.271909713745117, 42.456573486328125, -7.255836486816406, -52.79229736328125, 185.1031036376953, 93.90067291259766, 468.5675048828125, 169.11239624023438, -98.51762390136719, 82.78600311279297, 507.7330627441406, 243.15740966796875, 148.5418243408203, 246.18466186523438, 151.5997772216797, 33.65392303466797, 140.7647705078125, 326.94317626953125, 347.95513916015625, 152.00335693359375, -84.68291473388672, -34.882728576660156, 96.2186508178711, 96.51856994628906, 212.234130859375, 89.10431671142578, -28.92122459411621, 72.97930908203125, -4.933349609375, -31.820749282836914, 42.65742492675781, 156.7063751220703, 280.0104675292969, -52.83544158935547, 498.82183837890625, 165.88909912109375, 180.83212280273438, -121.10430145263672, 11.934364318847656, 209.20460510253906, 402.7939453125, 290.27117919921875, 180.6436309814453, 151.64349365234375, 59.33129119873047, 261.49627685546875, 144.78744506835938, 291.572021484375, 142.7763671875, 68.35902404785156, 3.4475860595703125, 201.84912109375, 270.0278625488281, -175.4219970703125, 224.9923095703125, 412.54913330078125, 217.8777313232422, 165.66470336914062, 469.6133117675781, 325.58294677734375, 18.827287673950195, 513.8613891601562, 277.05242919921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000250.npy"}
|
||||
{"epoch": 0.3671071953010279, "step": 251, "batch_size": 64, "mean": 106.85773468017578, "std": 183.47549438476562, "min": -365.3336181640625, "p10": -59.44277343749998, "median": 90.65372085571289, "p90": 327.2239624023438, "max": 699.09326171875, "pos_frac": 0.703125, "sample": [368.1737365722656, 12.05984878540039, 326.12884521484375, -19.48339080810547, 222.65682983398438, -265.77142333984375, -365.3336181640625, 699.09326171875, -5.727165222167969, 189.1509246826172, 184.0084228515625, -131.44940185546875, 327.69329833984375, 54.528289794921875, 114.0950927734375, -35.841827392578125, -276.0381164550781, 475.1542053222656, -25.155019760131836, 170.64454650878906, 25.147991180419922, 281.9561767578125, -14.869789123535156, -25.30327606201172, 29.3187255859375, 153.0132293701172, -119.95025634765625, -67.06338500976562, 124.19113159179688, 234.6390380859375, 67.26910400390625, -129.60003662109375, 343.0290832519531, -38.55133819580078, 60.29882049560547, 220.95632934570312, 77.58235931396484, 133.91354370117188, 150.38829040527344, 141.94247436523438, 11.930160522460938, 237.90040588378906, -13.403253555297852, 286.26416015625, -4.1443634033203125, 355.3855285644531, 197.6637420654297, 185.24229431152344, 89.9727554321289, 121.82886505126953, -32.156150817871094, 16.60106658935547, 560.6196899414062, 79.81723022460938, 316.87811279296875, 175.70269775390625, -41.661346435546875, 194.173583984375, 91.33468627929688, -11.794971466064453, 29.126914978027344, 138.66104125976562, 56.19074249267578, 129.8658905029297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000251.npy"}
|
||||
{"epoch": 0.368575624082232, "step": 252, "batch_size": 64, "mean": 144.34500122070312, "std": 149.48019409179688, "min": -495.4229736328125, "p10": -15.836193847656244, "median": 157.97798919677734, "p90": 334.6926696777344, "max": 451.053955078125, "pos_frac": 0.859375, "sample": [285.482666015625, 372.5810852050781, 54.0981330871582, 220.31292724609375, 251.38963317871094, 61.96990966796875, 145.42825317382812, 346.7507629394531, 81.89151000976562, 165.94320678710938, 217.5534210205078, 23.885238647460938, 168.525634765625, 58.84974670410156, 115.76914978027344, 165.1454315185547, 55.13779067993164, 135.27110290527344, 182.13348388671875, -57.889854431152344, 252.1761932373047, 171.8650665283203, 20.880168914794922, 409.32049560546875, 254.47854614257812, -123.17533874511719, -20.960298538208008, 114.8773193359375, -18.249954223632812, 150.92628479003906, -27.797626495361328, 184.5087890625, 8.26707649230957, 141.53311157226562, 86.302734375, 110.67828369140625, 110.63717651367188, 403.1044616699219, 368.24749755859375, 312.2826232910156, 32.13275909423828, 165.02969360351562, 178.9349822998047, 48.09245300292969, 193.20401000976562, 336.25146484375, 274.4161376953125, 294.3132629394531, 192.7052001953125, 166.8030548095703, -10.204086303710938, -495.4229736328125, 331.05548095703125, 267.25823974609375, 113.9434814453125, 107.33787536621094, -8.746744155883789, 172.52398681640625, 451.053955078125, -57.91239547729492, 79.67826843261719, 65.3355941772461, 186.97811889648438, 193.18685913085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000252.npy"}
|
||||
{"epoch": 0.3700440528634361, "step": 253, "batch_size": 64, "mean": 140.40821838378906, "std": 173.90423583984375, "min": -381.0729064941406, "p10": -72.01529006958008, "median": 125.19615173339844, "p90": 370.7252319335939, "max": 575.4754638671875, "pos_frac": 0.8125, "sample": [-68.58133697509766, 26.96770477294922, 275.7210388183594, 49.07624816894531, 16.749267578125, 141.49917602539062, 226.39089965820312, 53.528404235839844, 185.89747619628906, 80.71186828613281, 97.74113464355469, 575.4754638671875, 240.5440673828125, 66.25048065185547, 212.65945434570312, 116.19064331054688, 396.5731506347656, 86.03352355957031, -82.21536254882812, 105.42381286621094, -26.36207389831543, -381.0729064941406, 235.47442626953125, 381.8414001464844, 344.7875061035156, 9.331056594848633, 442.487548828125, 33.78235626220703, -21.149505615234375, 50.70560073852539, 17.821449279785156, -74.186279296875, 134.8316192626953, 257.0285339355469, 293.99359130859375, 227.71878051757812, 277.37469482421875, -128.79051208496094, 287.23114013671875, 115.0114517211914, 251.4567413330078, 336.79571533203125, 163.56033325195312, 279.6545715332031, 407.969482421875, 115.46635437011719, 105.35389709472656, 412.3382263183594, 134.20166015625, -226.14410400390625, 343.27801513671875, 169.91458129882812, -90.5908203125, 48.359825134277344, -73.48698425292969, 110.84010314941406, 161.7591552734375, 229.43650817871094, 220.26185607910156, -32.65899658203125, -53.14407730102539, 42.860595703125, 465.9612731933594, 182.1849365234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000253.npy"}
|
||||
{"epoch": 0.37151248164464024, "step": 254, "batch_size": 64, "mean": 119.60063171386719, "std": 200.97354125976562, "min": -268.7315979003906, "p10": -116.69888763427734, "median": 92.71843338012695, "p90": 400.39639587402365, "max": 815.716064453125, "pos_frac": 0.75, "sample": [110.89276123046875, -37.415077209472656, 152.69198608398438, 85.51862335205078, 100.86666870117188, 137.0242462158203, 24.24188232421875, -82.78163146972656, 149.68255615234375, 38.947975158691406, 134.12957763671875, 160.68467712402344, 232.33651733398438, 55.97477722167969, -122.27574157714844, -268.7315979003906, 198.11944580078125, -50.55241012573242, 89.93743896484375, 219.94552612304688, 4.853183746337891, 219.821044921875, 506.4474792480469, 323.6724853515625, 345.0406188964844, -191.67019653320312, 426.985107421875, 302.4886169433594, 6.968467712402344, 18.841012954711914, 95.49942779541016, 198.4368438720703, 73.1377182006836, -195.5631103515625, 19.705326080322266, 192.47561645507812, -70.56328582763672, 424.12030029296875, -109.83828735351562, 61.19449234008789, -25.743980407714844, -206.55059814453125, 61.55845642089844, 473.4835205078125, -4.594457626342773, 219.8704376220703, 4.011072158813477, 342.66656494140625, 64.5568618774414, 185.1990966796875, 815.716064453125, 163.68934631347656, 85.892822265625, -17.409339904785156, -160.79776000976562, 559.2754516601562, 120.9442138671875, 233.91854858398438, 157.35890197753906, 67.10285949707031, -119.63914489746094, 537.9383544921875, 148.72250366210938, -34.02044677734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000254.npy"}
|
||||
{"epoch": 0.37298091042584436, "step": 255, "batch_size": 64, "mean": 128.23907470703125, "std": 180.14280700683594, "min": -425.31768798828125, "p10": -49.17146377563476, "median": 104.87778854370117, "p90": 349.645428466797, "max": 582.887451171875, "pos_frac": 0.78125, "sample": [582.887451171875, -109.3348388671875, 27.659881591796875, 360.21759033203125, 151.96096801757812, -49.407745361328125, -47.96520233154297, -32.81999206542969, -425.31768798828125, 61.2296257019043, 288.57623291015625, 92.901123046875, -9.153533935546875, 21.70351791381836, 226.46202087402344, 224.741943359375, -157.76229858398438, -217.30068969726562, 166.39785766601562, 288.3308410644531, 244.59075927734375, 45.88661193847656, 80.40892791748047, 18.121768951416016, 98.85018920898438, -48.620140075683594, -125.73532104492188, 378.0064392089844, 3.680927276611328, 177.97079467773438, 216.4662322998047, 236.82037353515625, 72.53787231445312, 314.5969543457031, 370.26751708984375, 313.2315368652344, 262.2248840332031, 361.8643493652344, 324.90948486328125, 538.853271484375, 39.584144592285156, -5.455318450927734, 279.11712646484375, 146.49835205078125, 21.943538665771484, 193.89529418945312, 64.99972534179688, 223.17257690429688, -117.50642395019531, 324.97705078125, 411.39569091796875, 53.17612838745117, 135.6492919921875, 33.695594787597656, -18.04766082763672, 133.52313232421875, 30.516273498535156, 110.90538787841797, 32.517478942871094, 147.3076171875, -42.83521270751953, 79.07649230957031, 318.30108642578125, 281.95263671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000255.npy"}
|
||||
{"epoch": 0.3744493392070485, "step": 256, "batch_size": 64, "mean": 136.01217651367188, "std": 173.10562133789062, "min": -305.7796630859375, "p10": -62.291674041748045, "median": 118.13191604614258, "p90": 351.99562377929686, "max": 585.5707397460938, "pos_frac": 0.765625, "sample": [95.79815673828125, 520.8592529296875, 94.22199249267578, 230.06288146972656, 193.21798706054688, 252.00997924804688, 31.016427993774414, 163.35174560546875, -33.026817321777344, 13.978744506835938, 296.80615234375, 217.07879638671875, 220.83395385742188, 351.13177490234375, 251.73239135742188, -134.5611572265625, 61.254024505615234, 200.3269500732422, -305.7796630859375, 31.244346618652344, 282.66986083984375, 115.94331359863281, -124.88900756835938, 291.4373474121094, 59.48859786987305, 261.9087829589844, -60.92705535888672, -62.87651062011719, -66.26052856445312, 183.5629119873047, 379.75506591796875, -4.7972869873046875, 212.069091796875, 86.95855712890625, 61.511199951171875, 128.20742797851562, 347.7374572753906, -69.13812255859375, 52.38465881347656, 42.70106506347656, -176.9988250732422, 38.723785400390625, 28.148590087890625, 16.077972412109375, 178.37106323242188, -22.92477798461914, 203.29141235351562, 120.32051849365234, -18.61505126953125, 268.25189208984375, -44.960235595703125, 105.06800842285156, -8.152626037597656, 199.58880615234375, 352.3658447265625, 388.05902099609375, 142.09510803222656, 65.84402465820312, 585.5707397460938, 526.339599609375, 222.25503540039062, 317.8383483886719, -6.589834213256836, 355.8064880371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000256.npy"}
|
||||
{"epoch": 0.37591776798825255, "step": 257, "batch_size": 64, "mean": 143.2915802001953, "std": 182.66693115234375, "min": -267.51116943359375, "p10": -47.056467819213864, "median": 106.3177261352539, "p90": 427.5809020996094, "max": 633.9842529296875, "pos_frac": 0.78125, "sample": [464.05242919921875, -42.94729232788086, 9.217727661132812, 106.06843566894531, 357.7488098144531, -12.049240112304688, 227.99371337890625, 434.49737548828125, 229.48776245117188, 158.48867797851562, -16.734342575073242, -48.817543029785156, 103.85139465332031, 296.0711669921875, 147.50714111328125, 33.016204833984375, 226.91064453125, 210.64723205566406, 29.960941314697266, 428.3675231933594, 35.61726379394531, 409.3373107910156, 264.58074951171875, -147.7947540283203, 87.76981353759766, 110.44004821777344, -92.55220031738281, 14.887809753417969, -31.622982025146484, 453.9605407714844, 197.31504821777344, 231.33299255371094, 257.136474609375, 633.9842529296875, 92.6145248413086, -54.7711181640625, 106.5670166015625, 9.16285514831543, 125.18665313720703, -267.51116943359375, 79.36089324951172, 425.7454528808594, 41.1076545715332, 238.11785888671875, 196.4680938720703, 215.96652221679688, 62.8333740234375, 1.0254745483398438, 511.90814208984375, 236.00839233398438, 247.10601806640625, 306.58087158203125, -14.115699768066406, 104.71654510498047, 46.963348388671875, 516.385986328125, -91.84420776367188, -12.91295051574707, 168.67544555664062, -165.80555725097656, 33.41496276855469, 37.42498779296875, -12.06573486328125, 218.615234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000257.npy"}
|
||||
{"epoch": 0.37738619676945667, "step": 258, "batch_size": 64, "mean": 139.01031494140625, "std": 194.26425170898438, "min": -499.5169982910156, "p10": -50.48630294799804, "median": 134.64312744140625, "p90": 365.74736328125005, "max": 661.735595703125, "pos_frac": 0.765625, "sample": [285.470947265625, 35.989524841308594, -86.03663635253906, 264.5644226074219, 51.35700988769531, 133.80465698242188, 261.7614440917969, 252.67922973632812, 224.94821166992188, -8.021728515625, -35.887481689453125, -34.584815979003906, -54.413230895996094, 49.82255554199219, 102.10520935058594, -33.15907287597656, -41.189979553222656, 106.99618530273438, 153.66102600097656, -7.183509826660156, 56.640655517578125, 255.73269653320312, 520.4738159179688, 209.54173278808594, 467.7138366699219, 252.66751098632812, 191.61004638671875, 24.01203155517578, 169.93902587890625, 348.2260437011719, 201.5894012451172, 279.6517333984375, 310.231689453125, 181.4418487548828, -209.9552001953125, 92.46058654785156, 271.0711364746094, -353.57647705078125, 21.996849060058594, 119.02735137939453, 381.6627502441406, -3.9558067321777344, -92.41603088378906, 373.2565002441406, 33.63874435424805, -499.5169982910156, 661.735595703125, 135.13888549804688, -131.94924926757812, 113.83724212646484, 233.2142333984375, -41.32347106933594, 293.9345703125, 333.9938659667969, 378.1941223144531, 51.78331756591797, 287.08258056640625, 421.00372314453125, 149.056640625, 242.31057739257812, 74.51325988769531, 134.14736938476562, 251.673095703125, 82.46488189697266], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000258.npy"}
|
||||
{"epoch": 0.3788546255506608, "step": 259, "batch_size": 64, "mean": 119.85458374023438, "std": 182.16685485839844, "min": -241.02125549316406, "p10": -93.11334915161132, "median": 84.36827087402344, "p90": 359.64826965332037, "max": 542.901611328125, "pos_frac": 0.75, "sample": [215.90353393554688, -16.02271842956543, 147.38174438476562, 196.1188201904297, 7.493999481201172, -102.372802734375, 316.6200866699219, 417.1219482421875, 441.0936584472656, 212.62123107910156, 53.664466857910156, -53.11540985107422, 106.74351501464844, -71.26447296142578, 8.991241455078125, -97.47885131835938, 344.5633850097656, -53.69255065917969, 468.349609375, 184.26528930664062, 112.70797729492188, 151.64508056640625, 83.4429931640625, 55.12179946899414, 290.79119873046875, 20.425392150878906, 64.36463928222656, 50.05314636230469, 542.901611328125, -183.69789123535156, 58.942474365234375, 33.7039794921875, 512.2537841796875, 177.21011352539062, 21.309837341308594, -163.8253631591797, 136.58648681640625, 341.885986328125, -147.14321899414062, 314.84173583984375, -157.12399291992188, 16.30193328857422, 328.98846435546875, -79.46498107910156, 85.29354858398438, 50.647003173828125, 366.11322021484375, 314.9208984375, -241.02125549316406, 163.3788604736328, -38.568756103515625, 295.145751953125, 276.56488037109375, -5.26776123046875, 383.3857421875, -82.92717742919922, 127.05117797851562, 322.31512451171875, 133.12786865234375, -75.13977813720703, 55.55112838745117, 36.61892318725586, 142.22955322265625, 52.065528869628906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000259.npy"}
|
||||
{"epoch": 0.3803230543318649, "step": 260, "batch_size": 64, "mean": 160.49334716796875, "std": 180.99234008789062, "min": -183.06951904296875, "p10": -15.737824249267575, "median": 124.55017852783203, "p90": 389.5504455566407, "max": 974.7369384765625, "pos_frac": 0.875, "sample": [221.57586669921875, -65.94896697998047, 44.772254943847656, -105.85824584960938, 15.137655258178711, 164.6064453125, 118.37757110595703, 168.74925231933594, 370.91033935546875, 46.23704528808594, 67.595703125, 397.5390625, 118.94166564941406, 102.97859191894531, 154.4777069091797, 974.7369384765625, 168.14744567871094, 259.1183776855469, 59.97662353515625, 469.35150146484375, -183.06951904296875, 157.1058349609375, -31.723691940307617, 170.83221435546875, -107.5125503540039, 0.01860809326171875, 153.45745849609375, -17.243301391601562, 245.39187622070312, 114.11532592773438, 120.57954406738281, 354.45184326171875, -12.225044250488281, 233.4676971435547, 132.7375030517578, 340.1143798828125, 364.3739318847656, 157.94671630859375, 119.66291809082031, 99.51687622070312, 95.53007507324219, 66.94842529296875, 336.13671875, 128.52081298828125, 150.28768920898438, 25.552608489990234, 31.893142700195312, 84.1864013671875, 176.69412231445312, 189.85581970214844, 209.00726318359375, 175.05203247070312, 106.86780548095703, 416.13470458984375, 27.903358459472656, -43.543312072753906, 83.56707763671875, 427.08599853515625, 21.27574920654297, 574.3060913085938, 91.63526153564453, 411.0841064453125, 40.955291748046875, 281.215576171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000260.npy"}
|
||||
{"epoch": 0.38179148311306904, "step": 261, "batch_size": 64, "mean": 150.63623046875, "std": 205.22320556640625, "min": -318.5157775878906, "p10": -74.42410011291503, "median": 126.93074035644531, "p90": 386.44739685058596, "max": 907.6098022460938, "pos_frac": 0.796875, "sample": [389.7518005371094, 373.46893310546875, 37.588050842285156, -230.4421844482422, 120.94325256347656, 907.6098022460938, 35.820587158203125, 378.73712158203125, -35.26787185668945, 208.20025634765625, 7.805585861206055, 73.04749298095703, -89.84716796875, 408.96514892578125, 112.67143249511719, 215.46548461914062, 224.88180541992188, 344.6830749511719, 360.08856201171875, -52.06806945800781, 91.83706665039062, 81.87136840820312, 132.91822814941406, 203.60403442382812, 191.07815551757812, 315.7879638671875, 502.63714599609375, 15.732696533203125, 301.7832946777344, 229.48512268066406, 65.01687622070312, 29.45301055908203, -1.398406982421875, 76.62193298339844, 242.6612548828125, 53.81444549560547, 275.400146484375, 372.43011474609375, 263.5489501953125, 135.899658203125, 230.01266479492188, -63.798946380615234, 479.9837341308594, 412.52813720703125, 2.1839046478271484, 371.33685302734375, -170.33273315429688, 206.38841247558594, 3.1110305786132812, 234.10610961914062, 107.2220687866211, 481.90380859375, -25.385459899902344, -318.5157775878906, -188.38253784179688, 115.7668228149414, -168.51885986328125, -15.268495559692383, 169.48782348632812, 191.8844451904297, 57.954124450683594, 81.55529022216797, 152.1884765625, -78.97773742675781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000261.npy"}
|
||||
{"epoch": 0.3832599118942731, "step": 262, "batch_size": 64, "mean": 123.85541534423828, "std": 165.8516845703125, "min": -250.5835418701172, "p10": -75.60032844543454, "median": 129.5196075439453, "p90": 291.31186828613284, "max": 753.728759765625, "pos_frac": 0.8125, "sample": [73.68299865722656, 399.4861755371094, 113.0597915649414, 211.438720703125, -250.5835418701172, 328.1202087402344, 461.043212890625, 121.47740173339844, 149.08389282226562, 259.26605224609375, -152.253662109375, -216.84286499023438, 138.60568237304688, 753.728759765625, 181.99139404296875, -172.54367065429688, 164.8245849609375, 261.5806884765625, -15.521780014038086, 179.65713500976562, 160.39715576171875, 53.08009338378906, -86.61643981933594, 292.7684631347656, 134.56454467773438, 5.3112640380859375, 40.13942337036133, 386.54876708984375, 116.20755004882812, 287.91314697265625, 51.049560546875, 87.28165435791016, 92.46224975585938, -49.89606857299805, 80.71195983886719, 60.57147979736328, 164.94964599609375, -31.903762817382812, 162.67242431640625, 152.5342559814453, 144.58087158203125, 134.18292236328125, 9.739280700683594, 236.28424072265625, 243.1166534423828, 75.94857025146484, 258.69775390625, 109.50072479248047, -99.17449188232422, -48.70580291748047, 65.91395568847656, 36.389495849609375, 45.77323913574219, 298.54449462890625, -161.333984375, -47.817100524902344, 124.85629272460938, 155.23110961914062, 234.92066955566406, 246.96243286132812, 180.245849609375, 41.10063171386719, 256.96466064453125, 234.77572631835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000262.npy"}
|
||||
{"epoch": 0.38472834067547723, "step": 263, "batch_size": 64, "mean": 177.93978881835938, "std": 210.72232055664062, "min": -329.54010009765625, "p10": -76.6076217651367, "median": 150.9087905883789, "p90": 466.18482360839863, "max": 729.54541015625, "pos_frac": 0.796875, "sample": [337.8810119628906, -329.54010009765625, 148.63816833496094, 166.07115173339844, 360.5029296875, 57.82085418701172, 323.98834228515625, 49.231414794921875, 51.84867858886719, 93.0715560913086, -233.37249755859375, 170.53370666503906, 70.80589294433594, 307.10870361328125, 325.6640930175781, 184.8760528564453, 18.782899856567383, 355.962158203125, 119.72225189208984, 145.77752685546875, -7.174386978149414, -83.45944213867188, 47.304298400878906, 294.21197509765625, -22.047626495361328, -99.9107666015625, 324.4202880859375, 153.17941284179688, -27.07482147216797, 102.11888122558594, 43.07838439941406, 24.000898361206055, 356.0922546386719, 57.11970520019531, -60.62004089355469, 502.68499755859375, 291.2268371582031, 105.85845947265625, 399.7595520019531, 485.266845703125, 108.2409439086914, 503.64312744140625, 104.11259460449219, 269.4698486328125, 340.4289245605469, 158.63278198242188, 484.63818359375, -86.44789123535156, 242.1600341796875, 264.6595458984375, 389.11785888671875, 423.1269836425781, 688.46533203125, -178.3621063232422, -9.903366088867188, 565.5716552734375, -33.231361389160156, -93.4128646850586, 192.94700622558594, 205.67388916015625, 132.0683135986328, 729.54541015625, 137.89724731445312, 237.69338989257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000263.npy"}
|
||||
{"epoch": 0.38619676945668135, "step": 264, "batch_size": 64, "mean": 151.44468688964844, "std": 212.54832458496094, "min": -258.09716796875, "p10": -79.34185409545896, "median": 127.4139633178711, "p90": 453.01604309082035, "max": 655.5096435546875, "pos_frac": 0.765625, "sample": [217.0185546875, 299.7891540527344, 47.3005256652832, 655.5096435546875, 87.16000366210938, -258.09716796875, -185.07168579101562, 406.9818420410156, 88.10015869140625, 482.40252685546875, -52.947914123535156, 31.870641708374023, 102.93650817871094, -141.36294555664062, 160.1658935546875, 194.70571899414062, 283.09588623046875, 441.33233642578125, 417.9170227050781, 308.87359619140625, 67.8692626953125, 247.13705444335938, 287.3631591796875, 211.33937072753906, 55.69788360595703, 34.74397277832031, -50.66510772705078, -14.617782592773438, -59.167747497558594, 214.3762664794922, -87.98789978027344, -4.420036315917969, 51.58991241455078, 616.7243041992188, 2.8388671875, 458.0233459472656, 140.23928833007812, 244.00631713867188, 21.177474975585938, 236.2073516845703, 58.134796142578125, 229.44920349121094, 120.32308959960938, 143.99635314941406, -14.159141540527344, 633.4546508789062, 402.97283935546875, -231.30810546875, 231.25143432617188, -166.74676513671875, 184.24562072753906, 11.416461944580078, 240.07138061523438, 501.03338623046875, -37.57939147949219, 134.5048370361328, 21.424537658691406, 58.745880126953125, -234.9173126220703, 509.2015075683594, 322.4529113769531, 262.3826904296875, -10.165939331054688, 62.11979675292969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000264.npy"}
|
||||
{"epoch": 0.3876651982378855, "step": 265, "batch_size": 64, "mean": 147.303955078125, "std": 244.25196838378906, "min": -500.40045166015625, "p10": -98.5947494506836, "median": 124.05256652832031, "p90": 437.1080841064454, "max": 857.8463134765625, "pos_frac": 0.75, "sample": [107.44039154052734, 421.0201110839844, 327.9361572265625, 240.52513122558594, 68.41094207763672, 170.0934600830078, 348.8445739746094, 20.21955108642578, 640.1756591796875, 112.98931884765625, 250.64035034179688, -328.6501770019531, 23.78281021118164, -39.28580093383789, -146.91778564453125, 43.442081451416016, 378.94854736328125, -119.73912048339844, 96.3305892944336, -25.667999267578125, 268.94708251953125, 857.8463134765625, -500.40045166015625, 527.8598022460938, -92.42654418945312, 135.11581420898438, 212.998779296875, 478.8609313964844, -77.10874938964844, 737.4027709960938, -246.4737091064453, 300.0284729003906, 62.54441833496094, -65.41473388671875, 93.75970458984375, -58.2459716796875, 20.037017822265625, -49.95318603515625, 402.91790771484375, -101.23826599121094, 447.0091247558594, 95.2528076171875, 69.77791595458984, 406.4767150878906, -41.56867218017578, 180.7360382080078, 20.386260986328125, 170.40322875976562, -33.34864807128906, 286.842041015625, 106.11649322509766, 144.911865234375, 143.124755859375, 413.8201904296875, 153.84800720214844, 149.84519958496094, 232.1758270263672, 137.35928344726562, 50.23592758178711, -335.0928039550781, 444.0029296875, 93.92572021484375, 363.64990234375, 229.9663543701172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000265.npy"}
|
||||
{"epoch": 0.3891336270190896, "step": 266, "batch_size": 64, "mean": 119.01298522949219, "std": 200.33860778808594, "min": -315.40142822265625, "p10": -81.63846359252928, "median": 77.05123519897461, "p90": 413.24994201660166, "max": 706.061767578125, "pos_frac": 0.71875, "sample": [-13.460708618164062, 552.5343017578125, 41.643470764160156, 124.38481903076172, 235.3240509033203, 139.9695587158203, -34.89381408691406, 272.4019470214844, 83.00564575195312, 456.90277099609375, 149.26609802246094, 305.59100341796875, -60.73219299316406, 262.6594543457031, -118.34245300292969, 53.282676696777344, 280.2220764160156, 91.52828979492188, 169.48081970214844, 56.774620056152344, 112.68818664550781, -273.80401611328125, -50.211952209472656, 373.49078369140625, 180.9661102294922, 391.3665466308594, 143.6636199951172, 6.494712829589844, 57.232032775878906, 422.6285400390625, 476.54742431640625, 276.77960205078125, 706.061767578125, 468.1796875, 226.86476135253906, 64.28585815429688, 72.78800964355469, 234.85845947265625, 39.252586364746094, 69.5346908569336, 72.15060424804688, 143.60772705078125, 64.0706558227539, 4.269201278686523, 172.52389526367188, -13.884075164794922, -315.40142822265625, -226.80230712890625, 268.7835693359375, 33.13536834716797, -156.66940307617188, -89.09351348876953, -40.20274353027344, -64.24334716796875, -60.643150329589844, -44.79096984863281, -22.616052627563477, -35.70165252685547, -170.14581298828125, 459.8547668457031, 231.2567138671875, 264.2103576660156, 14.637947082519531, 81.31446075439453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000266.npy"}
|
||||
{"epoch": 0.39060205580029367, "step": 267, "batch_size": 64, "mean": 146.3773193359375, "std": 207.8628692626953, "min": -481.7513732910156, "p10": -42.570927810668934, "median": 122.60110092163086, "p90": 363.6195617675782, "max": 719.3955688476562, "pos_frac": 0.796875, "sample": [83.2144546508789, 172.93405151367188, 347.48089599609375, -27.449329376220703, 92.20390319824219, -19.44209098815918, 287.8208312988281, 185.9940643310547, 24.766693115234375, 21.205829620361328, -24.768585205078125, -52.566009521484375, 17.481536865234375, 370.5361328125, 174.78262329101562, 139.86883544921875, -54.38231658935547, 240.26669311523438, 494.00927734375, 719.3955688476562, 89.46290588378906, 97.95630645751953, 125.92668914794922, 22.145599365234375, -146.19618225097656, 86.83805084228516, 82.03256225585938, 105.90316009521484, 301.93902587890625, 216.8135223388672, 566.5145263671875, 406.6517639160156, 708.36376953125, 67.59872436523438, -21.30740737915039, 252.0589141845703, 15.867748260498047, -49.051612854003906, 12.327434539794922, 157.28225708007812, 282.8430480957031, 20.088333129882812, -6.128673553466797, 604.40283203125, -481.7513732910156, -4.560874938964844, -379.473388671875, 222.06768798828125, 199.68911743164062, 119.2755126953125, 231.55409240722656, 226.26754760742188, 98.66888427734375, 304.9640808105469, 111.98334503173828, 149.05975341796875, -49.95533752441406, 310.4744567871094, 55.52509307861328, 127.1763916015625, 341.3841247558594, 170.39080810546875, 294.7620544433594, 126.96024322509766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000267.npy"}
|
||||
{"epoch": 0.3920704845814978, "step": 268, "batch_size": 64, "mean": 141.71861267089844, "std": 205.74954223632812, "min": -337.403564453125, "p10": -52.47172927856444, "median": 133.72422790527344, "p90": 354.51913452148443, "max": 795.4550170898438, "pos_frac": 0.78125, "sample": [29.634376525878906, 450.04412841796875, 89.67896270751953, -40.026344299316406, 54.71220397949219, 70.22315216064453, 149.6796875, 75.1435546875, 361.946533203125, 112.52493286132812, 140.69308471679688, -316.90374755859375, 174.39102172851562, 45.646331787109375, 254.54571533203125, -222.1490478515625, 134.83511352539062, 62.55049133300781, -200.3541259765625, 132.61334228515625, 173.05035400390625, 795.4550170898438, 65.46925354003906, 156.70376586914062, 69.86326599121094, 257.22674560546875, 485.60003662109375, 542.4352416992188, -37.457210540771484, 337.18853759765625, -81.26432037353516, 68.42353820800781, -57.80546569824219, -16.76205825805664, 603.071533203125, 157.38548278808594, 207.33897399902344, 156.14956665039062, 189.4619140625, 212.75392150878906, 122.4189453125, 279.04107666015625, 8.438545227050781, -72.93110656738281, 304.580078125, 80.94232177734375, 179.3059539794922, 279.69482421875, -8.246826171875, 245.4617919921875, 196.4532928466797, 136.2325897216797, 713.39892578125, 214.44479370117188, 86.42208862304688, 20.949783325195312, 131.13369750976562, -0.00829315185546875, 324.88916015625, -14.45936393737793, 196.82333374023438, -337.403564453125, -36.17192459106445, 174.86407470703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000268.npy"}
|
||||
{"epoch": 0.3935389133627019, "step": 269, "batch_size": 64, "mean": 170.71112060546875, "std": 220.3466033935547, "min": -427.46435546875, "p10": -92.44907760620116, "median": 172.338134765625, "p90": 438.0767791748047, "max": 650.135986328125, "pos_frac": 0.765625, "sample": [167.09507751464844, 282.3896179199219, 140.18853759765625, 316.83148193359375, -76.82140350341797, 240.85964965820312, 244.79632568359375, 177.58119201660156, -143.87570190429688, -47.37700653076172, 307.760986328125, -22.931259155273438, 435.3605041503906, 51.80792236328125, 351.432861328125, 294.10980224609375, 9.384910583496094, 192.5164337158203, -52.00675964355469, -427.46435546875, 272.9085998535156, 30.304996490478516, 204.79226684570312, -178.1083984375, -99.14665222167969, 415.5892333984375, 12.296394348144531, 625.4014892578125, 501.772216796875, 138.01654052734375, 158.968505859375, 44.97149658203125, 447.7701416015625, 290.3632507324219, -10.069992065429688, 336.48480224609375, 427.9581604003906, 87.2504653930664, 242.94326782226562, 23.62255859375, 6.297767639160156, 143.1212615966797, -191.96202087402344, 62.242767333984375, 132.1631317138672, 627.613525390625, 279.14312744140625, 507.7022705078125, 307.4327697753906, -23.135025024414062, 291.4845275878906, 222.76849365234375, 0.81341552734375, 650.135986328125, -137.34449768066406, 436.8512878417969, -28.97845458984375, 76.00121307373047, -13.883769989013672, 256.5050354003906, 416.63763427734375, 438.60198974609375, -158.0045928955078, 207.575927734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000269.npy"}
|
||||
{"epoch": 0.39500734214390604, "step": 270, "batch_size": 64, "mean": 128.4014434814453, "std": 202.8406982421875, "min": -426.0492248535156, "p10": -67.9484634399414, "median": 118.8958854675293, "p90": 359.88875122070317, "max": 747.9391479492188, "pos_frac": 0.78125, "sample": [243.17919921875, 344.3734130859375, 447.70660400390625, -360.2355041503906, -426.0492248535156, -14.938217163085938, 49.188194274902344, 271.1808776855469, 106.3294448852539, 293.6099548339844, 340.10626220703125, -50.147308349609375, 140.16802978515625, 230.2902069091797, 17.322967529296875, 58.026302337646484, 94.7779769897461, 171.79525756835938, -155.63619995117188, 218.16812133789062, 263.7419738769531, 157.167236328125, -46.930931091308594, 201.40170288085938, 440.3800048828125, 272.7583923339844, 17.358747482299805, 121.24884796142578, 209.12649536132812, 111.52176666259766, -234.2527618408203, 363.2532958984375, 747.9391479492188, -82.77592468261719, -62.666900634765625, 277.60382080078125, 297.9437255859375, 39.96424102783203, 105.63774108886719, -2.2141761779785156, 589.0758056640625, -218.28640747070312, 177.46359252929688, 17.91799545288086, 137.28689575195312, 116.54292297363281, 30.600967407226562, 227.85000610351562, -70.21199035644531, 8.089527130126953, 224.72027587890625, 352.03814697265625, 44.65458679199219, 14.380277633666992, 30.439498901367188, 155.20492553710938, -24.764892578125, 21.50958251953125, -22.468360900878906, 364.55755615234375, 152.64004516601562, 153.735595703125, 65.45915222167969, 451.8331298828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000270.npy"}
|
||||
{"epoch": 0.3964757709251101, "step": 271, "batch_size": 64, "mean": 162.08523559570312, "std": 193.21275329589844, "min": -214.08688354492188, "p10": -35.381995773315424, "median": 112.89587020874023, "p90": 429.64421386718766, "max": 752.1815795898438, "pos_frac": 0.78125, "sample": [752.1815795898438, 353.1717834472656, 93.98112487792969, -37.833980560302734, 273.73486328125, -4.175617218017578, 367.0689392089844, 388.5934143066406, 25.06244659423828, -73.05191802978516, 329.05731201171875, -138.50076293945312, 445.278564453125, -214.08688354492188, 91.10875701904297, -27.048606872558594, 89.07608032226562, -26.38372039794922, 128.82200622558594, -14.946029663085938, -87.04674530029297, -5.1058502197265625, 463.4547119140625, 352.5831298828125, 528.44775390625, 385.062744140625, 81.79488372802734, -70.58612060546875, 56.268287658691406, 143.69598388671875, 1.2852516174316406, 251.04383850097656, 173.38442993164062, -29.66069793701172, -63.539794921875, 141.7981719970703, 87.21945190429688, 0.9661865234375, 393.1640625, 101.0010986328125, -18.823265075683594, 527.8269653320312, 54.90290069580078, 7.665525436401367, 122.49998474121094, 17.588207244873047, 110.29739379882812, 160.07810974121094, 338.0165100097656, 217.47634887695312, 496.7137451171875, 303.513671875, 115.49434661865234, 27.197341918945312, 498.78082275390625, 48.08020782470703, 232.0469970703125, 335.8900451660156, 55.17413330078125, 200.8538360595703, 192.78677368164062, 283.8021240234375, 95.92916870117188, 243.3231658935547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000271.npy"}
|
||||
{"epoch": 0.39794419970631423, "step": 272, "batch_size": 64, "mean": 200.57228088378906, "std": 191.79505920410156, "min": -204.8065643310547, "p10": -79.17268676757811, "median": 218.12490844726562, "p90": 465.3450103759766, "max": 596.7237548828125, "pos_frac": 0.828125, "sample": [279.579345703125, 207.20953369140625, -65.84748840332031, -94.12913513183594, -32.70359802246094, 383.0030517578125, 245.38221740722656, 444.4833984375, 467.73541259765625, 347.11065673828125, 310.64422607421875, 359.0860900878906, -99.33562469482422, 180.34768676757812, -96.81022644042969, 72.43209838867188, 104.74385833740234, 190.49586486816406, 218.27273559570312, 137.119873046875, -98.98223876953125, 247.0872802734375, 338.79193115234375, 221.5269775390625, 291.5406494140625, 109.01485443115234, 1.6570472717285156, 235.02120971679688, 126.8837661743164, 583.93310546875, 5.7223052978515625, 240.41851806640625, 216.24496459960938, 173.53726196289062, 90.70362854003906, 466.7221374511719, 494.1721496582031, -204.8065643310547, 208.73822021484375, 286.63812255859375, 596.7237548828125, 32.23876953125, 524.962890625, 154.18841552734375, 312.8409423828125, -72.84195709228516, 324.6572265625, 61.065433502197266, 239.79685974121094, 429.4060974121094, -42.22975158691406, 217.97708129882812, 9.806259155273438, -107.86811828613281, -81.88585662841797, 299.522705078125, 296.0207214355469, 54.44934844970703, 423.2080078125, 145.30177307128906, 222.37808227539062, 462.1317138671875, 512.7192993164062, 228.67050170898438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000272.npy"}
|
||||
{"epoch": 0.39941262848751835, "step": 273, "batch_size": 64, "mean": 186.65919494628906, "std": 204.59596252441406, "min": -293.6380615234375, "p10": -68.78959274291991, "median": 187.54432678222656, "p90": 427.44396667480476, "max": 675.2109375, "pos_frac": 0.8125, "sample": [233.68927001953125, 331.45361328125, 119.91262817382812, 164.52847290039062, 529.181396484375, 477.61688232421875, 186.43801879882812, -98.79401397705078, 641.2687377929688, 15.169723510742188, 378.989990234375, 262.7405090332031, -76.48202514648438, 64.62139129638672, -34.8426513671875, 122.7324447631836, 17.518505096435547, 188.650634765625, 201.23297119140625, 360.83447265625, 190.65432739257812, 53.85853576660156, 92.56085205078125, 138.34161376953125, 115.55021667480469, 28.421791076660156, 382.06488037109375, 210.8140411376953, 164.63467407226562, 208.0858154296875, 329.300048828125, -17.540977478027344, 345.1745910644531, 240.243408203125, -27.702150344848633, 675.2109375, 544.4241333007812, 234.2504119873047, 191.58750915527344, 298.79180908203125, -200.44851684570312, -148.25064086914062, 272.15069580078125, -50.84058380126953, 260.03326416015625, 281.06512451171875, 409.81097412109375, 62.32539367675781, 130.3684844970703, -293.6380615234375, 629.4280395507812, 169.4090576171875, 72.18965148925781, -96.5645751953125, -24.9503173828125, 344.6962585449219, 38.87599182128906, 119.50782775878906, 326.1328125, -111.78349304199219, 332.2818908691406, 92.19641876220703, 413.6501770019531, 433.3555908203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000273.npy"}
|
||||
{"epoch": 0.4008810572687225, "step": 274, "batch_size": 64, "mean": 144.20184326171875, "std": 197.9510040283203, "min": -245.71681213378906, "p10": -125.45073089599606, "median": 124.75010299682617, "p90": 445.13988342285165, "max": 581.8504638671875, "pos_frac": 0.765625, "sample": [46.21021270751953, 102.07820129394531, 33.59967803955078, -162.25074768066406, 230.67123413085938, 165.17550659179688, 228.80592346191406, -46.735389709472656, 129.83641052246094, -6.6439666748046875, -142.5484619140625, 471.7013854980469, 364.91900634765625, 257.1485290527344, 262.6143493652344, 182.2482452392578, 19.731712341308594, 482.00067138671875, -245.71681213378906, 322.650146484375, -148.49876403808594, 141.4463348388672, -25.905548095703125, 50.59225082397461, 341.5885009765625, 303.9765319824219, 455.8772888183594, -141.72747802734375, 104.1572494506836, 581.8504638671875, -42.48868179321289, 129.90542602539062, 308.15484619140625, 163.78366088867188, -5.856939315795898, 96.51191711425781, 463.5155944824219, -19.464385986328125, 124.84315490722656, 82.62257385253906, 297.65850830078125, 8.486007690429688, 101.54905700683594, 272.8818359375, 420.0859375, -97.86418914794922, 116.98292541503906, 30.64412498474121, 198.20330810546875, 552.159912109375, 278.85467529296875, 95.49943542480469, 0.7112197875976562, 147.15228271484375, -241.03421020507812, 326.97509765625, -137.20565795898438, -98.02256774902344, 551.7552490234375, 188.87478637695312, 109.34832763671875, 313.304931640625, 6.879669189453125, 124.65705108642578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000274.npy"}
|
||||
{"epoch": 0.4023494860499266, "step": 275, "batch_size": 64, "mean": 150.89297485351562, "std": 190.32464599609375, "min": -261.3113098144531, "p10": -76.86675262451172, "median": 140.12873077392578, "p90": 405.2445068359376, "max": 550.8244018554688, "pos_frac": 0.78125, "sample": [213.5814666748047, 78.92457580566406, 301.6044616699219, 4.989635467529297, -24.1804141998291, -96.3594741821289, 384.1814880371094, 123.08189392089844, 505.6009826660156, -77.86078643798828, 245.90695190429688, 381.8282775878906, 550.8244018554688, 80.2236557006836, 149.26675415039062, 285.90338134765625, -42.59634780883789, 23.724853515625, 124.91643524169922, 194.7406463623047, -125.8725357055664, 198.38229370117188, 430.7664489746094, 27.301551818847656, 32.985877990722656, -222.7252197265625, 313.74884033203125, 168.78741455078125, 165.6300048828125, 74.25231170654297, -85.38184356689453, 39.442626953125, 314.63671875, 210.81695556640625, -16.868257522583008, 41.714935302734375, -203.312744140625, -48.030670166015625, -43.21173095703125, 45.37202453613281, 72.75827026367188, 322.1159362792969, -261.3113098144531, 177.09214782714844, 237.6337127685547, 414.2715148925781, 207.38064575195312, 545.9642944335938, 219.41473388671875, 485.6309814453125, 418.70941162109375, -35.74043273925781, 263.61761474609375, 377.27215576171875, 63.181068420410156, 322.6736145019531, 130.99070739746094, 338.3542785644531, -74.5473403930664, 224.7655487060547, 52.70476531982422, 338.45849609375, 66.508544921875, 22.51312255859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000275.npy"}
|
||||
{"epoch": 0.40381791483113066, "step": 276, "batch_size": 64, "mean": 155.49386596679688, "std": 209.3193359375, "min": -262.2996520996094, "p10": -85.92391204833982, "median": 159.25896453857422, "p90": 427.3069091796876, "max": 849.3602905273438, "pos_frac": 0.78125, "sample": [106.3616714477539, 849.3602905273438, 172.19569396972656, 433.00848388671875, 489.64410400390625, -17.188745498657227, -15.87994384765625, 577.4456787109375, 35.36396789550781, 151.9250030517578, 140.02114868164062, 17.882850646972656, -27.574462890625, 248.58419799804688, 296.7237243652344, 265.1712341308594, 204.5966796875, -262.2996520996094, 138.58119201660156, 9.632312774658203, 36.9217643737793, 298.47955322265625, 206.85983276367188, 317.2898254394531, -101.18980407714844, -236.6690673828125, -22.946231842041016, 85.67909240722656, 212.27212524414062, 342.0636901855469, -50.303497314453125, 182.51416015625, 41.17894744873047, 166.59292602539062, 335.25604248046875, 281.432373046875, -171.9423828125, -212.3850555419922, -1.3281536102294922, 27.29153060913086, -153.3470916748047, 21.91998291015625, 58.08115768432617, 466.0732727050781, 83.75638580322266, 170.2998046875, 234.2605438232422, 213.94187927246094, 298.6978454589844, 54.28278350830078, 476.96234130859375, -19.135101318359375, 173.22848510742188, 279.2576904296875, 63.52001953125, 242.62684631347656, 213.734619140625, 167.8209991455078, 376.28411865234375, 88.92752075195312, -145.65280151367188, 583.8078002929688, 37.631874084472656, 414.00323486328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000276.npy"}
|
||||
{"epoch": 0.4052863436123348, "step": 277, "batch_size": 64, "mean": 154.2389678955078, "std": 205.53924560546875, "min": -315.504638671875, "p10": -107.87050552368164, "median": 129.42224884033203, "p90": 439.11878967285156, "max": 564.7153930664062, "pos_frac": 0.765625, "sample": [8.147502899169922, 324.9115295410156, -153.29966735839844, 160.988525390625, 21.853084564208984, 295.19757080078125, 380.5067138671875, 157.16744995117188, -107.20075988769531, 216.6959228515625, 223.244140625, -36.44568634033203, 382.24853515625, -27.94281578063965, 204.91932678222656, 382.364013671875, 83.14856719970703, 115.60205078125, 206.464111328125, 308.6966552734375, -104.85906219482422, 372.34600830078125, 58.36033630371094, 70.3522720336914, -208.07720947265625, -108.15753936767578, 509.39459228515625, -11.37322998046875, -35.488128662109375, 123.98922729492188, -315.504638671875, 549.0494995117188, 143.18478393554688, 313.11871337890625, -195.0247802734375, -155.1966552734375, 564.7153930664062, 266.0943298339844, 131.12225341796875, -3.18121337890625, 239.61358642578125, 27.412811279296875, 503.5347900390625, 30.102436065673828, 12.136611938476562, 120.1995620727539, 185.01104736328125, 364.05596923828125, 436.58489990234375, 476.12542724609375, -158.43637084960938, 285.9549865722656, 76.17867279052734, 76.7932357788086, 12.797027587890625, -6.700557708740234, 235.8986358642578, 126.2231216430664, 440.2047424316406, 465.33599853515625, 127.72224426269531, 172.89846801757812, 103.98303985595703, 405.5314636230469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000277.npy"}
|
||||
{"epoch": 0.4067547723935389, "step": 278, "batch_size": 64, "mean": 181.261474609375, "std": 205.91200256347656, "min": -151.93533325195312, "p10": -51.46418380737304, "median": 145.36105346679688, "p90": 515.4886749267579, "max": 696.2789916992188, "pos_frac": 0.78125, "sample": [-95.39935302734375, 351.68353271484375, -78.10314178466797, 75.90731811523438, 311.6964416503906, 339.11767578125, 229.61260986328125, 84.62434387207031, 175.31277465820312, 107.6679458618164, 259.1646423339844, 186.9251708984375, 31.36456871032715, -77.23001098632812, 158.12283325195312, 271.6478271484375, -7.4202728271484375, 323.9314880371094, 504.4234924316406, 18.504478454589844, 15.87635612487793, 641.213623046875, 415.013916015625, 204.92608642578125, 272.0005187988281, 293.6873779296875, 106.32926177978516, -24.382667541503906, 179.74386596679688, 525.1873168945312, 463.4955749511719, -58.33589172363281, 105.706298828125, 34.3826904296875, 558.7152099609375, 520.2308959960938, 34.95287322998047, -40.23834228515625, 82.00170135498047, 192.6048583984375, 83.18408966064453, 429.78021240234375, 162.6156463623047, 68.86746215820312, 41.23419189453125, 529.618408203125, 132.59927368164062, 245.9491729736328, -42.073368072509766, 596.4441528320312, 172.901123046875, 132.0432586669922, 40.40484619140625, -2.3398666381835938, 295.5818786621094, 285.08648681640625, -36.88660430908203, -41.726158142089844, 696.2789916992188, -151.93533325195312, 348.565673828125, 109.49154663085938, -134.12869262695312, -55.48881912231445], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000278.npy"}
|
||||
{"epoch": 0.40822320117474303, "step": 279, "batch_size": 64, "mean": 182.81570434570312, "std": 236.7415771484375, "min": -355.23931884765625, "p10": -55.30280227661132, "median": 157.2572479248047, "p90": 476.8917053222657, "max": 912.4232177734375, "pos_frac": 0.75, "sample": [67.31641387939453, 78.19148254394531, 50.25835037231445, -20.85931396484375, -6.801727294921875, -40.315025329589844, 432.99755859375, 482.2302551269531, -13.191230773925781, -32.84462356567383, -14.772651672363281, 272.62335205078125, 189.083984375, 45.626800537109375, 531.2652587890625, 636.654541015625, 55.02265548706055, 173.98541259765625, 301.49810791015625, 226.16519165039062, 685.3805541992188, 100.3661117553711, 28.289892196655273, -303.1739501953125, 50.84107971191406, 98.5708999633789, 1.2982177734375, 183.26763916015625, 140.52908325195312, 543.0747680664062, 112.2452392578125, 67.04854583740234, 237.95968627929688, -22.527196884155273, 251.73956298828125, 422.01177978515625, 232.35513305664062, 912.4232177734375, 464.4350891113281, 314.093017578125, -3.1414031982421875, -61.72613525390625, -108.96891784667969, -355.23931884765625, 184.0442352294922, 443.5284118652344, 77.16900634765625, 261.14654541015625, 363.26214599609375, 136.65040588378906, 456.82073974609375, 313.3385925292969, 82.87198638916016, -136.1490478515625, 504.8700866699219, 223.47848510742188, 375.16162109375, 279.58343505859375, -120.787353515625, -163.52064514160156, 437.831787109375, 345.2977600097656, -12.080631256103516, 242.39976501464844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000279.npy"}
|
||||
{"epoch": 0.40969162995594716, "step": 280, "batch_size": 64, "mean": 185.09500122070312, "std": 192.14739990234375, "min": -334.43365478515625, "p10": -39.85333023071287, "median": 166.95523071289062, "p90": 439.121517944336, "max": 570.8621215820312, "pos_frac": 0.796875, "sample": [130.9932403564453, 324.10675048828125, 134.06216430664062, 266.43603515625, -23.202957153320312, -46.98920440673828, 356.8194885253906, 448.50299072265625, 498.1693115234375, 46.146820068359375, 228.6993865966797, 21.034278869628906, -19.731189727783203, 349.4490966796875, 392.6232604980469, 387.62109375, 107.60631561279297, 266.36077880859375, 195.54022216796875, 290.5426330566406, 60.116031646728516, 516.7797241210938, 483.7615051269531, 198.6529083251953, 256.2946472167969, 88.93447875976562, 363.40325927734375, 365.8526611328125, -3.046844482421875, 226.83413696289062, 193.03363037109375, 153.42074584960938, 547.5820922851562, 169.31857299804688, -18.615692138671875, 150.01907348632812, -215.4002685546875, 369.81317138671875, 224.53660583496094, 103.32125091552734, 220.125244140625, 108.33348846435547, -128.1560516357422, 570.8621215820312, -79.54238891601562, -334.43365478515625, 126.35749053955078, 417.2314147949219, 132.91561889648438, 155.0498046875, 211.26268005371094, 464.44927978515625, -1.5050220489501953, 142.19290161132812, -3.431640625, -89.80626678466797, 157.40512084960938, 110.66642761230469, 164.59188842773438, 338.2298583984375, 305.44207763671875, -175.16448974609375, 72.55815124511719, 371.043701171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000280.npy"}
|
||||
{"epoch": 0.4111600587371512, "step": 281, "batch_size": 64, "mean": 166.26089477539062, "std": 193.9694061279297, "min": -642.2554931640625, "p10": -24.963335800170892, "median": 154.92941284179688, "p90": 403.5678527832032, "max": 587.0740966796875, "pos_frac": 0.84375, "sample": [155.85211181640625, 294.0071105957031, 139.1232147216797, 222.53135681152344, 89.46678161621094, 237.72808837890625, 71.09661865234375, 199.10232543945312, 176.7400665283203, 83.8409652709961, 219.484375, 410.6501159667969, 255.70875549316406, 65.7350082397461, 259.4368896484375, -70.77099609375, 191.2861328125, 587.0740966796875, 48.46051025390625, 147.5603485107422, -2.464630126953125, 363.9188232421875, 248.95118713378906, 481.7768249511719, 182.40771484375, 274.7923278808594, 174.51744079589844, 290.37274169921875, -27.248950958251953, 563.6626586914062, 387.0425720214844, 104.24333190917969, 158.48028564453125, 443.7410888671875, 158.42755126953125, -642.2554931640625, 79.25505065917969, 137.89051818847656, 81.00457000732422, 21.972335815429688, 466.425537109375, 32.52830505371094, -19.630233764648438, 306.99285888671875, 152.15408325195312, 259.21502685546875, 34.337127685546875, 113.22566223144531, 541.4160766601562, 111.67078399658203, -181.57275390625, 137.27462768554688, -101.54750061035156, 110.05783081054688, -54.605987548828125, -3.7272567749023438, -49.856964111328125, 376.0059814453125, 222.0377960205078, 74.37508392333984, 154.0067138671875, 320.20550537109375, 1.9391231536865234, 373.16815185546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000281.npy"}
|
||||
{"epoch": 0.41262848751835535, "step": 282, "batch_size": 64, "mean": 129.51254272460938, "std": 217.6875762939453, "min": -207.386962890625, "p10": -112.03567504882812, "median": 112.35026931762695, "p90": 350.2529571533204, "max": 1206.857421875, "pos_frac": 0.75, "sample": [444.9620666503906, 123.72760009765625, 67.196533203125, -207.386962890625, 199.23011779785156, 264.23529052734375, 89.54134368896484, 43.95283508300781, 119.43255615234375, 330.453369140625, 276.34283447265625, 175.8590087890625, 332.7782287597656, 95.1789779663086, -10.532150268554688, 181.86883544921875, 1206.857421875, 146.22918701171875, 203.36492919921875, 117.58807373046875, 154.86871337890625, 192.22015380859375, 29.411922454833984, -129.98342895507812, 549.6328735351562, 357.74212646484375, 107.11246490478516, 149.47047424316406, 152.6894989013672, 504.10394287109375, 266.16424560546875, 183.86907958984375, 323.1851806640625, -32.756500244140625, 26.721080780029297, 88.69642639160156, -173.92955017089844, -107.28592681884766, 71.68185424804688, -138.47567749023438, -70.47174072265625, 54.7691650390625, 141.6608123779297, 198.82864379882812, 261.11614990234375, -14.476646423339844, -71.72738647460938, 376.43048095703125, 207.0191650390625, 118.68472290039062, -47.65361785888672, -26.369400024414062, 24.738754272460938, 51.85971450805664, 99.56944274902344, -207.27999877929688, 314.962158203125, 25.213972091674805, -114.07128143310547, 380.6675109863281, -196.60784912109375, 43.627437591552734, -75.68714141845703, 37.98008728027344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000282.npy"}
|
||||
{"epoch": 0.41409691629955947, "step": 283, "batch_size": 64, "mean": 202.7651824951172, "std": 210.1786651611328, "min": -447.37689208984375, "p10": -41.985166168212885, "median": 194.54068756103516, "p90": 490.7471893310547, "max": 648.0303955078125, "pos_frac": 0.84375, "sample": [-74.6917724609375, -90.3800277709961, 222.78684997558594, 38.98558807373047, 373.65643310546875, 321.3529968261719, -45.0838623046875, 229.76690673828125, 494.7007141113281, 41.55280303955078, 73.1493148803711, 166.87074279785156, -11.071956634521484, 71.03718566894531, 105.21723175048828, 253.32569885253906, -447.37689208984375, 74.00872802734375, 445.55242919921875, -69.44925689697266, 492.8999328613281, 471.24627685546875, 192.99476623535156, 322.62579345703125, 46.88105392456055, 144.48687744140625, -27.43223762512207, 468.86639404296875, 344.60113525390625, 498.81744384765625, 67.812255859375, 295.54364013671875, -154.91307067871094, 196.08660888671875, 453.24627685546875, 56.46897888183594, 115.74539184570312, -92.63877868652344, 84.3802490234375, 8.121885299682617, 540.3379516601562, 196.4084930419922, 312.5926513671875, 219.82659912109375, 303.3106384277344, 219.03781127929688, 95.87090301513672, 184.08847045898438, 427.47137451171875, 304.4902648925781, 596.2421875, 58.915184020996094, 393.4697265625, 143.46807861328125, 69.9686508178711, 275.478515625, 545.83447265625, 228.88514709472656, 360.5632629394531, 648.0303955078125, 485.72412109375, 129.641845703125, 112.31937408447266, -34.75487518310547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000283.npy"}
|
||||
{"epoch": 0.4155653450807636, "step": 284, "batch_size": 64, "mean": 184.29434204101562, "std": 204.35250854492188, "min": -201.36456298828125, "p10": -40.71529731750488, "median": 152.04788970947266, "p90": 476.21762390136735, "max": 735.5867309570312, "pos_frac": 0.859375, "sample": [-88.96127319335938, 87.88667297363281, 333.7681884765625, 39.43780517578125, 96.36756134033203, 71.55252838134766, 352.7613525390625, 18.476280212402344, 264.3580017089844, 431.5013427734375, 400.13671875, -31.97854995727539, 495.3817443847656, 44.102500915527344, 290.9424743652344, 50.26955032348633, 37.024192810058594, 253.18878173828125, 300.85845947265625, 501.2771911621094, 48.782135009765625, 645.7393188476562, 143.56570434570312, 284.6976318359375, 131.49069213867188, 46.78242492675781, 160.5300750732422, 75.4066162109375, 311.35345458984375, -63.44203186035156, -5.240325927734375, 735.5867309570312, 186.45225524902344, -62.18560028076172, 225.6812744140625, 109.446533203125, -125.35172271728516, 77.10692596435547, 238.5928955078125, 731.4259033203125, 80.24284362792969, 184.68496704101562, 197.1002197265625, 10.197343826293945, -104.8599853515625, -201.36456298828125, 279.1253356933594, 333.4719543457031, 124.82145690917969, 233.77171325683594, 554.7088012695312, 184.1663818359375, 316.2073059082031, 227.9769744873047, 38.46839904785156, 7.362247467041016, -44.459617614746094, 8.434532165527344, 357.78143310546875, 247.01951599121094, 2.7637100219726562, 526.4295654296875, 16.64385223388672, 369.3704833984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000284.npy"}
|
||||
{"epoch": 0.4170337738619677, "step": 285, "batch_size": 64, "mean": 164.75491333007812, "std": 209.03392028808594, "min": -592.2425537109375, "p10": -63.20351791381835, "median": 147.68311309814453, "p90": 399.98017578125007, "max": 639.3946533203125, "pos_frac": 0.84375, "sample": [387.67010498046875, 239.0552978515625, 45.813140869140625, 405.25592041015625, 143.03347778320312, 125.17694091796875, 140.30612182617188, 59.870399475097656, 140.85638427734375, 639.3946533203125, 317.9105224609375, 186.67454528808594, 362.9379577636719, 132.51171875, 146.8820343017578, 568.49072265625, 20.881317138671875, 140.41702270507812, 112.33486938476562, 259.4191589355469, 478.1498107910156, -284.771484375, 60.403587341308594, 310.0323181152344, 423.4209289550781, -27.80321502685547, -66.85165405273438, 85.43621826171875, 101.56170654296875, 64.85770416259766, 259.70989990234375, 186.68052673339844, -70.62674713134766, 78.50845336914062, 280.7833557128906, 314.4368896484375, 24.30237579345703, -592.2425537109375, 248.69839477539062, 18.901336669921875, 179.22366333007812, 16.466705322265625, 600.4150390625, 274.0744934082031, -54.691200256347656, 323.0060119628906, 283.44775390625, 133.38417053222656, 252.1119384765625, 384.1365661621094, 191.36862182617188, -186.6728973388672, 348.0182189941406, 206.28506469726562, -191.7210235595703, -31.167118072509766, 428.2838134765625, 143.87759399414062, 102.28719329833984, 218.6454315185547, -200.74542236328125, 325.8856201171875, 181.43038940429688, 148.48419189453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000285.npy"}
|
||||
{"epoch": 0.4185022026431718, "step": 286, "batch_size": 64, "mean": 182.9866485595703, "std": 230.26596069335938, "min": -354.0040588378906, "p10": -96.95835037231444, "median": 167.04224395751953, "p90": 482.9579315185547, "max": 693.0340576171875, "pos_frac": 0.78125, "sample": [58.514793395996094, -69.60631561279297, 455.6512756347656, 138.05557250976562, 179.92828369140625, 170.91552734375, 35.474937438964844, 266.7664794921875, 358.72064208984375, -354.0040588378906, 303.8678283691406, 210.45578002929688, 1.4669647216796875, -80.15059661865234, 480.4163513183594, 235.25216674804688, 609.720458984375, 9.07855224609375, 230.57333374023438, 163.16896057128906, 484.04718017578125, 130.66647338867188, -265.2746887207031, 439.92462158203125, 314.43450927734375, -78.24237060546875, -60.76768112182617, 361.2073974609375, 365.7059020996094, 594.481201171875, 543.7164306640625, -232.61297607421875, 204.69236755371094, 63.336212158203125, -40.01347351074219, 25.511581420898438, 390.33172607421875, 149.1558380126953, 1.077484130859375, -118.69273376464844, 141.9596405029297, -102.58248138427734, 261.0958557128906, -90.28328704833984, 135.93917846679688, 437.65826416015625, 466.14630126953125, 143.41229248046875, -119.34060668945312, 557.9208984375, -61.24219512939453, 253.82321166992188, 86.97102355957031, 511.90155029296875, 151.7255859375, 310.7315979003906, 90.61251831054688, 242.4034423828125, 282.9039306640625, -99.819091796875, 693.0340576171875, 397.7079162597656, 251.21905517578125, 90.29496002197266], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000286.npy"}
|
||||
{"epoch": 0.4199706314243759, "step": 287, "batch_size": 64, "mean": 149.98983764648438, "std": 203.4695587158203, "min": -223.41293334960938, "p10": -90.3440689086914, "median": 116.10505676269531, "p90": 453.5651092529297, "max": 678.56005859375, "pos_frac": 0.78125, "sample": [113.12724304199219, 4.087982177734375, -7.525064468383789, 63.11669158935547, 140.72682189941406, -34.097015380859375, 295.984619140625, 597.2901611328125, 175.60218811035156, 160.3397216796875, 506.4615478515625, 102.72215270996094, 231.17234802246094, 534.6473388671875, -82.76486206054688, -72.73748779296875, 9.456504821777344, 522.0578002929688, 115.238037109375, 342.7716064453125, 206.64932250976562, 6.8149871826171875, 106.39508819580078, -223.41293334960938, 678.56005859375, 98.60246276855469, 56.67778015136719, -13.0010986328125, -93.59230041503906, 55.72173309326172, 167.1700897216797, -50.654762268066406, -166.52899169921875, 150.3236541748047, 76.68928527832031, 205.8684844970703, 67.51338195800781, 272.0635986328125, 118.63172912597656, 345.59515380859375, -16.813051223754883, 283.62750244140625, 168.48873901367188, 93.70301818847656, 51.13099670410156, 127.99880981445312, 116.97207641601562, 23.535263061523438, 452.6417236328125, 435.4449462890625, -147.54798889160156, 54.07244110107422, 215.03355407714844, 36.683067321777344, 453.9608459472656, -217.70816040039062, -142.14039611816406, -104.44457244873047, 262.90771484375, 354.9320373535156, 511.29913330078125, 387.6556091308594, 167.49441528320312, 246.656494140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000287.npy"}
|
||||
{"epoch": 0.42143906020558003, "step": 288, "batch_size": 64, "mean": 186.38465881347656, "std": 209.69747924804688, "min": -354.3421630859375, "p10": -4.829238891601558, "median": 182.98385620117188, "p90": 425.80621032714845, "max": 743.735595703125, "pos_frac": 0.875, "sample": [192.33926391601562, 177.29824829101562, 20.616031646728516, 211.9011688232422, 346.04107666015625, 417.19970703125, 0.2832164764404297, 743.735595703125, 27.30137825012207, 372.00518798828125, 292.3025207519531, 151.74417114257812, -131.53689575195312, 15.895660400390625, 99.50048828125, 157.7001495361328, 131.55699157714844, 428.1660461425781, 255.5001220703125, 370.5310363769531, 192.74954223632812, 286.2554931640625, 279.5377197265625, -0.55645751953125, 43.9926643371582, 197.66461181640625, 170.32415771484375, 48.960792541503906, 187.725341796875, 67.44999694824219, 206.40374755859375, 224.56478881835938, 604.0128784179688, 135.25070190429688, 420.2999267578125, 320.20880126953125, 209.66282653808594, -6.660430908203125, 200.663330078125, 52.62622833251953, -354.3421630859375, 618.377197265625, 110.62495422363281, 325.34393310546875, 67.77107238769531, 178.24237060546875, 57.00446319580078, 48.36955261230469, 487.50482177734375, 113.38005828857422, 247.07069396972656, 622.0855712890625, 215.52688598632812, -350.03033447265625, 93.17355346679688, 521.8764038085938, 312.04339599609375, 65.24354553222656, 62.54914855957031, 365.9961242675781, -29.74048614501953, 340.92633056640625, -145.01756286621094, -166.57847595214844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000288.npy"}
|
||||
{"epoch": 0.42290748898678415, "step": 289, "batch_size": 64, "mean": 208.77468872070312, "std": 232.82252502441406, "min": -208.53173828125, "p10": -66.03944244384766, "median": 169.97967529296875, "p90": 546.2722106933595, "max": 698.450439453125, "pos_frac": 0.828125, "sample": [142.39395141601562, -66.19241333007812, 634.2886962890625, 661.1767578125, -158.47933959960938, 606.212646484375, 415.3751220703125, 295.18743896484375, 49.187408447265625, 555.64697265625, 495.89044189453125, 432.77667236328125, 293.4873046875, 524.3977661132812, 346.43133544921875, 307.8905944824219, 21.282882690429688, 698.450439453125, -72.4956283569336, 251.20083618164062, 107.33612060546875, 48.196044921875, 103.04147338867188, -35.162654876708984, -146.054931640625, 30.97652816772461, 296.1220703125, 460.6548767089844, 67.56143188476562, 135.5683135986328, 428.8138122558594, 648.6748046875, 340.09625244140625, -65.68251037597656, 64.03107452392578, 34.16267395019531, 615.0516967773438, 93.41780090332031, -90.37553405761719, 13.929555892944336, 101.63357543945312, 496.6717529296875, -64.05178833007812, 256.0641174316406, 206.43060302734375, 398.33074951171875, 430.16473388671875, -43.7994384765625, 139.6769561767578, -116.22982788085938, 163.54241943359375, 186.0211181640625, 79.03262329101562, 32.956939697265625, 507.96258544921875, 189.116455078125, 29.246328353881836, 36.76421356201172, 176.41693115234375, 193.21221923828125, 256.5273132324219, -208.53173828125, 275.628662109375, 54.323753356933594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000289.npy"}
|
||||
{"epoch": 0.4243759177679883, "step": 290, "batch_size": 64, "mean": 210.26165771484375, "std": 259.14361572265625, "min": -354.5686950683594, "p10": -64.58390502929686, "median": 199.65428924560547, "p90": 576.3289428710938, "max": 889.0296630859375, "pos_frac": 0.8125, "sample": [546.8458251953125, 175.98712158203125, 131.28607177734375, 180.11862182617188, -354.5686950683594, 390.9812927246094, 443.57366943359375, -71.63302612304688, 37.94691467285156, 149.46983337402344, -225.7135009765625, 411.2516174316406, 231.632080078125, 11.810394287109375, 262.3612060546875, 213.95407104492188, 503.8355712890625, -26.911376953125, 247.14260864257812, 17.47586441040039, -1.6381072998046875, 45.09318542480469, -171.32989501953125, 136.92190551757812, 139.37344360351562, -288.1617126464844, 161.24362182617188, 317.9924011230469, 889.0296630859375, 710.5308837890625, 77.50483703613281, 246.4423370361328, 299.670166015625, 635.3829345703125, 243.23187255859375, 69.30680847167969, 278.3103942871094, 96.23927307128906, 264.3662109375, 186.81964111328125, 13.27301025390625, 238.16941833496094, 212.4889373779297, 335.7392578125, 48.87345886230469, 283.606201171875, -45.043418884277344, -48.135955810546875, 306.8215637207031, -127.05136108398438, 688.220703125, 479.00653076171875, 275.4493103027344, 564.1976318359375, 16.364452362060547, 619.0620727539062, 581.528076171875, 328.7440185546875, -45.84621047973633, 59.5133056640625, 71.35125732421875, 854.2652587890625, 239.28567504882812, -106.31367492675781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000290.npy"}
|
||||
{"epoch": 0.42584434654919234, "step": 291, "batch_size": 64, "mean": 194.061279296875, "std": 219.4269561767578, "min": -420.1502990722656, "p10": -48.37581405639648, "median": 209.96350860595703, "p90": 455.67958068847656, "max": 649.0008544921875, "pos_frac": 0.796875, "sample": [207.32362365722656, 229.43077087402344, 2.232818603515625, 56.565589904785156, -15.4212646484375, 296.98126220703125, 633.4031982421875, 291.6384582519531, 444.12188720703125, 275.6263122558594, 61.90205383300781, 407.71319580078125, 306.61474609375, -51.780479431152344, 649.0008544921875, -29.31494140625, 455.14947509765625, 235.3310089111328, -8.973886489868164, 240.6527557373047, 472.382568359375, 5.2590484619140625, 288.8214416503906, 85.61611938476562, 279.88311767578125, 390.17919921875, 151.78814697265625, 200.43016052246094, 399.2900085449219, 31.810985565185547, 340.2141418457031, 98.57459259033203, 338.86212158203125, -207.0968780517578, 68.75027465820312, -145.53488159179688, -34.965843200683594, 53.802024841308594, -420.1502990722656, 582.9412841796875, 348.86700439453125, 455.9067687988281, 398.042724609375, 212.6033935546875, -104.93801879882812, -40.43159484863281, -62.90046691894531, 354.59674072265625, 58.623191833496094, 158.02316284179688, 100.67033386230469, 168.21484375, 349.71221923828125, 312.441650390625, 5.455780029296875, 234.43960571289062, 426.1877746582031, 79.08641052246094, 528.4735717773438, -29.218528747558594, -128.03866577148438, 389.5113525390625, 533.37744140625, 2.160318374633789], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000291.npy"}
|
||||
{"epoch": 0.42731277533039647, "step": 292, "batch_size": 64, "mean": 198.60556030273438, "std": 245.7430877685547, "min": -579.1243286132812, "p10": -59.19863319396973, "median": 196.73492431640625, "p90": 513.1909454345704, "max": 739.297607421875, "pos_frac": 0.8125, "sample": [242.88531494140625, 54.469425201416016, 16.447097778320312, 194.3402099609375, 319.5202941894531, 199.129638671875, 119.4654541015625, 116.6861801147461, 201.37115478515625, -579.1243286132812, 706.955322265625, 62.75940704345703, 586.7393798828125, 112.17237854003906, 301.0663757324219, -60.14982604980469, -257.3016357421875, 427.959716796875, 60.31616973876953, 292.5909118652344, -23.36040496826172, 737.1025390625, 404.6327819824219, 234.16720581054688, 455.5290222167969, 517.454345703125, 265.5361022949219, 462.5252685546875, 158.79925537109375, 121.17251586914062, -117.08935546875, 265.1585693359375, 9.431099891662598, 493.097900390625, -12.814750671386719, 139.5801544189453, -99.04656982421875, 186.7609100341797, 135.0249786376953, 120.26731872558594, 41.919097900390625, 519.4186401367188, 223.12277221679688, 184.68702697753906, 206.55499267578125, 208.06915283203125, 416.9014587402344, 345.31842041015625, 739.297607421875, 0.5813579559326172, 170.87820434570312, 295.95391845703125, 247.9300994873047, -220.90501403808594, 235.07102966308594, -165.53622436523438, 707.8135375976562, -17.412975311279297, -56.979183197021484, 503.2430114746094, 212.26417541503906, 233.48666381835938, 124.94844055175781, -18.097780227661133], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000292.npy"}
|
||||
{"epoch": 0.4287812041116006, "step": 293, "batch_size": 64, "mean": 153.2054443359375, "std": 234.1806640625, "min": -246.8534393310547, "p10": -129.60037994384766, "median": 120.40057754516602, "p90": 434.9908477783203, "max": 902.8529052734375, "pos_frac": 0.734375, "sample": [38.926334381103516, 63.489776611328125, 20.157384872436523, 49.14556884765625, 218.4618682861328, 196.0983428955078, 470.8417663574219, 160.6977081298828, 433.45355224609375, 39.78398132324219, 435.6496887207031, -68.68647766113281, 209.98419189453125, 13.621326446533203, 902.8529052734375, -244.63946533203125, 287.3022766113281, 188.881591796875, 52.013397216796875, 280.1263427734375, 358.5346984863281, -152.93399047851562, 245.82876586914062, -99.69157409667969, 43.96185302734375, 99.32958221435547, -75.81095123291016, 134.97787475585938, -5.3986663818359375, 494.910400390625, 418.55877685546875, 299.1649169921875, 314.16937255859375, 423.6194152832031, 312.8778381347656, -130.30714416503906, 46.29908752441406, 158.3191680908203, 190.36282348632812, -14.559133529663086, -62.14611053466797, 78.7432632446289, -1.322998046875, 251.25131225585938, 272.62017822265625, 362.0232849121094, 213.89959716796875, 347.435302734375, 104.17759704589844, -240.04791259765625, 21.82049560546875, -246.8534393310547, -161.76756286621094, 470.06256103515625, 801.6060791015625, 105.82328033447266, -127.95126342773438, 146.6575927734375, 60.45224380493164, -80.17796325683594, -135.02182006835938, 365.37896728515625, 512.983642578125, -64.87327575683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000293.npy"}
|
||||
{"epoch": 0.4302496328928047, "step": 294, "batch_size": 64, "mean": 171.36770629882812, "std": 229.19219970703125, "min": -266.6346130371094, "p10": -93.35507049560542, "median": 97.51404571533203, "p90": 496.63154907226567, "max": 819.360595703125, "pos_frac": 0.875, "sample": [31.89685821533203, 53.5526123046875, 309.477783203125, 419.390625, 89.23187255859375, 59.4432487487793, -151.50582885742188, 138.23196411132812, 5.8878631591796875, 72.03260040283203, 17.083267211914062, 42.740814208984375, -48.223445892333984, 548.1871948242188, 50.08496856689453, 280.4488830566406, 194.5053253173828, 416.05133056640625, 55.72722244262695, 173.8677520751953, 680.2418823242188, -152.7259063720703, 261.498779296875, 90.93292236328125, 629.8847045898438, 418.5059509277344, 309.38037109375, 161.7010040283203, 178.58425903320312, -112.69719696044922, 240.51168823242188, 292.991943359375, 302.1706237792969, 819.360595703125, -199.66522216796875, -188.5087127685547, 643.6541137695312, 191.2325897216797, 396.3216857910156, 19.911293029785156, -266.6346130371094, -184.7757568359375, 249.634521484375, 307.8531799316406, 550.0950317382812, 108.78414916992188, 14.549625396728516, 160.75904846191406, 489.8067626953125, 10.33995246887207, 348.3963317871094, 365.9394836425781, 18.49138641357422, 92.38139343261719, 44.07456588745117, 38.0899658203125, 42.05561447143555, 61.840370178222656, 18.410289764404297, 102.64669799804688, 499.55645751953125, 76.38351440429688, 28.020782470703125, 49.43408966064453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000294.npy"}
|
||||
{"epoch": 0.43171806167400884, "step": 295, "batch_size": 64, "mean": 185.25486755371094, "std": 242.42413330078125, "min": -324.50128173828125, "p10": -123.36470413208005, "median": 164.7293930053711, "p90": 516.8847656250001, "max": 863.7744140625, "pos_frac": 0.78125, "sample": [-214.09979248046875, -6.604658126831055, 549.5991821289062, 12.459510803222656, -324.50128173828125, 217.05712890625, -151.57188415527344, 317.0579528808594, 298.1814270019531, -255.56280517578125, 329.272705078125, -31.412429809570312, 136.5672607421875, 387.34637451171875, 409.9358215332031, 281.9281311035156, 254.2909698486328, 281.8444519042969, -102.8331527709961, 87.22118377685547, 330.0186767578125, -14.251302719116211, 143.53604125976562, 364.1827087402344, 8.073448181152344, 149.772216796875, 527.608154296875, -132.1639404296875, 731.0679931640625, 160.43228149414062, 395.5450744628906, 19.304580688476562, 29.335506439208984, -148.18572998046875, 441.5980224609375, 548.2429809570312, 55.711387634277344, -95.25385284423828, -202.5797882080078, 91.61634826660156, 230.7464599609375, 408.3434143066406, 371.10919189453125, 117.84089660644531, -47.15862274169922, 75.65643310546875, 240.46632385253906, 65.54151153564453, 198.96456909179688, 526.6043701171875, 342.2798767089844, 275.73712158203125, 18.374420166015625, 863.7744140625, 494.2056884765625, 398.5539855957031, 144.66317749023438, 184.41824340820312, 192.7453155517578, 169.02650451660156, 49.48278045654297, 104.52290344238281, 612.150634765625, -61.52526092529297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000295.npy"}
|
||||
{"epoch": 0.4331864904552129, "step": 296, "batch_size": 64, "mean": 134.98883056640625, "std": 265.3215637207031, "min": -634.85205078125, "p10": -226.33961486816403, "median": 143.46627807617188, "p90": 503.3848236083985, "max": 765.5859985351562, "pos_frac": 0.734375, "sample": [117.96688079833984, 88.84159088134766, 153.17578125, 733.58544921875, -119.72787475585938, 548.59765625, -256.2787170410156, -83.26371765136719, -56.40465545654297, -634.85205078125, 102.78250122070312, 200.33270263671875, -306.01422119140625, 205.2355194091797, 164.94639587402344, -262.61236572265625, 153.6178436279297, 326.8265380859375, 609.7467041015625, 411.39361572265625, 220.82647705078125, 243.967041015625, -19.085365295410156, -80.22735595703125, 141.85418701171875, 355.5354309082031, 62.735443115234375, -241.0922393798828, 11.1640625, -191.9168243408203, 597.2313842773438, 15.341384887695312, 275.9630126953125, 51.417137145996094, 202.73019409179688, 508.3046569824219, 168.06129455566406, 98.77444458007812, -473.5124206542969, 242.64520263671875, -280.3735046386719, 298.1536560058594, -45.590797424316406, -60.13043212890625, 509.64984130859375, 94.65758514404297, 116.34862518310547, 194.0883026123047, 145.078369140625, 169.12783813476562, 127.46985626220703, 45.92505645751953, -4.600196838378906, 765.5859985351562, 235.49790954589844, 226.67019653320312, 89.24817657470703, 67.68242645263672, 491.90521240234375, -74.17018127441406, 247.21165466308594, 216.16580200195312, 405.53662109375, 369.5343322753906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000296.npy"}
|
||||
{"epoch": 0.434654919236417, "step": 297, "batch_size": 64, "mean": 162.16717529296875, "std": 196.54209899902344, "min": -253.93936157226562, "p10": -47.10204238891601, "median": 118.12560272216797, "p90": 415.98563537597664, "max": 624.1141357421875, "pos_frac": 0.8125, "sample": [136.47802734375, 241.7744598388672, 387.4781188964844, 517.7882080078125, 86.53721618652344, 11.731744766235352, 333.328857421875, 209.48529052734375, -135.23545837402344, -56.8204231262207, 70.90541076660156, 150.75064086914062, 232.54241943359375, 435.81707763671875, 391.6480712890625, -31.93560218811035, 78.72955322265625, 19.361724853515625, -45.8370361328125, -13.115928649902344, 317.08941650390625, -150.96241760253906, 88.57914733886719, 35.21080017089844, 514.5238037109375, 89.45523071289062, 303.92083740234375, 77.79022216796875, 237.32650756835938, 333.3007507324219, 174.06610107421875, 98.09213256835938, 402.495361328125, 213.04713439941406, 397.7518310546875, 26.01424789428711, 111.06767272949219, 4.046577453613281, 523.9304809570312, 272.2588806152344, 7.9745025634765625, 189.6099853515625, -238.44805908203125, 332.36962890625, 63.86560821533203, -55.13109588623047, 125.18353271484375, 73.411376953125, 600.5977783203125, -45.13459777832031, 134.4708251953125, 421.7671813964844, 244.29718017578125, -15.7222900390625, -253.93936157226562, 313.06195068359375, 2.9939651489257812, 82.15718078613281, 192.69223022460938, 624.1141357421875, 109.81890869140625, 373.5382080078125, -47.644187927246094, 52.37727355957031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000297.npy"}
|
||||
{"epoch": 0.43612334801762115, "step": 298, "batch_size": 64, "mean": 211.24917602539062, "std": 310.3638916015625, "min": -584.5660400390625, "p10": -115.59244537353516, "median": 172.8660659790039, "p90": 588.031982421875, "max": 1438.5341796875, "pos_frac": 0.71875, "sample": [121.97547149658203, -163.08450317382812, 22.34268569946289, -584.5660400390625, 55.556976318359375, -112.364013671875, -5.237958908081055, 677.4823608398438, 253.33737182617188, 5.67926025390625, 192.82186889648438, 471.197998046875, -61.15437316894531, -106.33824157714844, -164.89486694335938, 64.65401458740234, 206.73556518554688, 131.6046905517578, 113.9217300415039, 256.2752685546875, 827.7232666015625, -8.96270751953125, 380.26715087890625, -164.50656127929688, 643.6704711914062, 274.29815673828125, -116.97605895996094, 559.4232788085938, 590.5628662109375, 721.7755126953125, -122.5556869506836, -122.910888671875, -7.480247497558594, 7.9484100341796875, -33.195213317871094, 582.1265869140625, 677.7754516601562, 133.9435272216797, 429.2286376953125, 252.56939697265625, 8.702634811401367, -10.431549072265625, 1438.5341796875, 256.9423828125, 581.9523315429688, 228.39505004882812, 415.993896484375, 450.03363037109375, -12.333187103271484, 108.8190689086914, 209.75283813476562, 433.902099609375, 329.41033935546875, 137.40103149414062, 550.2103271484375, 161.79893493652344, -13.497255325317383, 370.6382141113281, -69.25361633300781, 227.71047973632812, 183.93319702148438, 234.58351135253906, 60.69371795654297, 355.38427734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000298.npy"}
|
||||
{"epoch": 0.43759177679882527, "step": 299, "batch_size": 64, "mean": 180.61741638183594, "std": 256.47125244140625, "min": -391.25189208984375, "p10": -99.99365539550782, "median": 137.14777374267578, "p90": 546.297412109375, "max": 855.2594604492188, "pos_frac": 0.765625, "sample": [-112.67792510986328, -260.3570861816406, -391.25189208984375, 373.0341796875, 49.70587158203125, -25.45706558227539, 361.92584228515625, 132.86151123046875, 77.89514923095703, 100.02942657470703, 589.0488891601562, 545.3995971679688, 743.1130981445312, -100.35286712646484, 165.3114471435547, 727.3954467773438, -202.12933349609375, 351.2002258300781, 138.56405639648438, 242.0629425048828, -99.1554946899414, 341.72833251953125, 145.40786743164062, 119.01287841796875, 451.3374328613281, 117.1573486328125, 855.2594604492188, -2.083049774169922, 214.50833129882812, 20.98098373413086, -15.029245376586914, 113.59748840332031, -148.473876953125, 43.82826232910156, 197.3015899658203, 546.6821899414062, 135.7314910888672, 152.72999572753906, -36.645782470703125, 172.6922607421875, 52.782493591308594, 318.1766662597656, 280.95184326171875, -45.51883316040039, 174.67575073242188, -89.16632843017578, -213.6946563720703, 493.62139892578125, 34.151123046875, 777.3037109375, -64.6152114868164, 69.8005142211914, 289.0296936035156, 339.552490234375, 259.4590759277344, 326.63720703125, 97.02763366699219, 611.4343872070312, 254.90582275390625, 132.76853942871094, 28.69207763671875, 350.4756774902344, 10.166877746582031, 239.00738525390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000299.npy"}
|
||||
{"epoch": 0.4390602055800294, "step": 300, "batch_size": 64, "mean": 212.70004272460938, "std": 294.4864196777344, "min": -427.9167175292969, "p10": -224.82164916992184, "median": 195.10107421875, "p90": 607.0065490722657, "max": 789.9986572265625, "pos_frac": 0.75, "sample": [317.71331787109375, 282.475341796875, 148.66683959960938, 381.001953125, 661.90283203125, -379.9697265625, -194.321533203125, 633.2086791992188, 407.94677734375, 8.230770111083984, -28.364553451538086, 339.2453918457031, 136.95091247558594, 590.181640625, 277.2880554199219, 405.0324401855469, 117.12422180175781, 476.94049072265625, 49.32841491699219, 65.96298217773438, -44.9029541015625, 199.49252319335938, 270.1372985839844, -31.36725616455078, 495.6966247558594, 365.033935546875, -128.12747192382812, 113.55653381347656, 159.0178985595703, 262.64776611328125, 73.65144348144531, 768.527587890625, 271.1935729980469, -427.9167175292969, 540.0347290039062, -308.5068054199219, 564.5454711914062, 355.0837707519531, 474.66552734375, 177.128173828125, 89.64936065673828, 460.5545654296875, 614.2172241210938, 323.8268127441406, -41.81771469116211, 356.2980041503906, -95.37409973144531, -245.06375122070312, 735.3736572265625, 430.9858093261719, 423.36456298828125, -271.01409912109375, 167.93118286132812, -262.9469299316406, -1.2879905700683594, 148.89108276367188, 508.7871398925781, 75.75457763671875, 629.3966064453125, 789.9986572265625, -237.89312744140625, 18.494495391845703, 190.70962524414062, -42.16938781738281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000300.npy"}
|
||||
{"epoch": 0.44052863436123346, "step": 301, "batch_size": 64, "mean": 214.18943786621094, "std": 274.3539123535156, "min": -210.6104736328125, "p10": -111.19889221191404, "median": 141.83805084228516, "p90": 600.5178649902344, "max": 988.8233032226562, "pos_frac": 0.8125, "sample": [221.3214569091797, 314.9335632324219, -42.352943420410156, 322.530029296875, 425.2071838378906, 305.0572814941406, 818.0784912109375, 112.52989196777344, 533.2681274414062, -11.419233322143555, 902.0341796875, 313.70501708984375, 52.772193908691406, 272.05291748046875, -151.29002380371094, 136.11427307128906, 99.15804290771484, 667.4117431640625, 104.64302062988281, 358.3997802734375, 819.2960205078125, 82.41325378417969, -126.41300201416016, 86.00614929199219, 452.4609069824219, 52.78094482421875, 64.7541732788086, 68.42347717285156, 256.82421875, 379.8088073730469, 267.5745849609375, 267.050048828125, 58.898189544677734, 218.34164428710938, 26.36069679260254, 56.67303466796875, 201.1118927001953, -177.2594757080078, 11.102535247802734, 13.5179443359375, 128.01712036132812, 2.890371322631836, 248.13882446289062, -136.13278198242188, -29.433456420898438, 988.8233032226562, 243.1692657470703, 197.92691040039062, 748.6214599609375, 78.45501708984375, 574.5596313476562, 147.56182861328125, 348.9267578125, 525.3150024414062, 413.285400390625, 55.543270111083984, -119.20379638671875, -192.09642028808594, 611.642822265625, -210.6104736328125, 105.89302062988281, -92.52078247070312, -48.77645492553711, 284.2477722167969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000301.npy"}
|
||||
{"epoch": 0.4419970631424376, "step": 302, "batch_size": 64, "mean": 176.7947235107422, "std": 236.75076293945312, "min": -241.3558349609375, "p10": -99.24397201538085, "median": 156.7672348022461, "p90": 500.6409790039064, "max": 894.9427490234375, "pos_frac": 0.765625, "sample": [34.67662048339844, 434.0556640625, 709.3601684570312, -56.504573822021484, 212.10430908203125, -38.79127502441406, 227.31239318847656, 8.9288330078125, 177.6006317138672, 32.912384033203125, 217.82406616210938, 29.710678100585938, 76.81060028076172, 78.8180923461914, 187.3287353515625, 109.70345306396484, 359.257080078125, -6.234668731689453, 444.93524169921875, 367.9473571777344, 227.9392852783203, 177.66250610351562, 188.00546264648438, 243.305908203125, -70.72221374511719, -102.62065887451172, 388.12060546875, 555.7684326171875, 93.49993133544922, -230.39207458496094, 219.04489135742188, -44.512550354003906, 146.21588134765625, -91.36503601074219, -108.11598205566406, 524.0819091796875, 894.9427490234375, 519.9550170898438, 167.31858825683594, -113.31072998046875, -28.22320556640625, 455.57489013671875, -241.3558349609375, 249.60552978515625, 424.5025634765625, 53.734458923339844, 117.26487731933594, 655.9307250976562, 227.75070190429688, 313.9814453125, 38.54523468017578, 227.2892608642578, -181.18069458007812, 285.63037109375, 73.27696228027344, 268.28314208984375, -58.09779357910156, 403.4012145996094, 69.73722839355469, 654.9857788085938, -145.87615966796875, 43.10661697387695, 118.34174346923828, 96.07572937011719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000302.npy"}
|
||||
{"epoch": 0.4434654919236417, "step": 303, "batch_size": 64, "mean": 159.26882934570312, "std": 241.89088439941406, "min": -420.1774597167969, "p10": -134.55278778076172, "median": 129.32454681396484, "p90": 522.2392761230469, "max": 678.1196899414062, "pos_frac": 0.703125, "sample": [304.3101806640625, -4.299350738525391, 28.07733154296875, 606.8524780273438, -136.4636993408203, 377.7178039550781, 438.56817626953125, 678.1196899414062, -18.8658390045166, -105.43869018554688, 365.43096923828125, 202.65615844726562, 350.9358215332031, -99.41043090820312, 90.29549407958984, 166.2948455810547, 93.27116394042969, 13.87755012512207, -80.0738754272461, 415.73809814453125, -420.1774597167969, -58.20649337768555, -130.093994140625, 312.7012023925781, 437.92718505859375, 562.1929931640625, -141.4000701904297, 333.74560546875, -253.21066284179688, 124.52717590332031, -15.977630615234375, 322.874267578125, 623.0018310546875, 224.16908264160156, 449.17822265625, 178.5828857421875, -166.97116088867188, 111.40985107421875, 100.46109771728516, 613.8239135742188, 531.93896484375, 52.8973388671875, 195.20175170898438, 516.2587280273438, 134.12191772460938, 212.99093627929688, 259.33563232421875, 316.1209716796875, 149.43972778320312, -41.349327087402344, 94.87075805664062, -37.568153381347656, 213.3030242919922, 20.888111114501953, 87.14129638671875, 524.8023681640625, -142.6361083984375, -40.85597229003906, 11.272708892822266, 22.683902740478516, 248.7133026123047, -4.5606689453125, -192.16812133789062, 164.21051025390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000303.npy"}
|
||||
{"epoch": 0.44493392070484583, "step": 304, "batch_size": 64, "mean": 182.71343994140625, "std": 157.5171661376953, "min": -239.23509216308594, "p10": -0.39480590820311523, "median": 180.743896484375, "p90": 385.81872558593756, "max": 623.9747924804688, "pos_frac": 0.890625, "sample": [31.852279663085938, 242.26121520996094, 275.2595520019531, 126.41002655029297, -8.835357666015625, -14.853225708007812, -4.431732177734375, 49.25468444824219, 191.80972290039062, 108.178466796875, 282.8825378417969, 317.01983642578125, 179.46514892578125, 354.47845458984375, 52.47727966308594, 272.9046630859375, 293.7393493652344, 293.64434814453125, 59.78667449951172, 33.83401870727539, 156.55398559570312, 391.99468994140625, 56.95347595214844, 69.74752807617188, 280.7771301269531, 86.783447265625, 279.6969299316406, 79.04954528808594, 371.40814208984375, 108.44329833984375, 128.65768432617188, 125.12240600585938, 623.9747924804688, 132.13449096679688, -50.982643127441406, -239.23509216308594, 229.77069091796875, 9.024688720703125, 29.696823120117188, 248.43539428710938, -58.906097412109375, 462.0069580078125, 106.81968688964844, 66.2377700805664, 453.4449462890625, 159.7874755859375, 12.483871459960938, 70.91622924804688, 224.53521728515625, 342.5035705566406, 258.0605163574219, -66.37077331542969, 223.4501190185547, 421.8305969238281, 393.06329345703125, 55.11381530761719, 266.3936462402344, 182.02264404296875, 407.2601318359375, 363.6565856933594, 333.7994384765625, 194.47640991210938, 322.216552734375, 243.71212768554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000304.npy"}
|
||||
{"epoch": 0.44640234948604995, "step": 305, "batch_size": 64, "mean": 163.28309631347656, "std": 236.73658752441406, "min": -322.0887451171875, "p10": -84.03926086425778, "median": 129.17516708374023, "p90": 458.1063873291016, "max": 1054.94189453125, "pos_frac": 0.734375, "sample": [400.0547790527344, 277.9049072265625, 113.23655700683594, 206.5447235107422, 189.6446990966797, 697.1471557617188, 259.94573974609375, 28.25581169128418, 257.44757080078125, 500.8643798828125, 171.6455841064453, 82.40293884277344, 79.58565521240234, -61.290008544921875, -93.7889404296875, -14.730842590332031, 317.846923828125, 447.1036682128906, 89.18329620361328, 68.59395599365234, 1054.94189453125, 349.7861328125, 23.234371185302734, 262.57489013671875, 292.44720458984375, -38.9000244140625, -6.780237197875977, 233.37840270996094, 241.67771911621094, -100.92880249023438, -42.29046630859375, 274.33514404296875, -5.0294342041015625, 328.0452575683594, 130.5120849609375, 146.47705078125, 537.5423583984375, 315.80316162109375, -25.659177780151367, 157.39654541015625, 127.83824920654297, -13.798118591308594, 102.48094177246094, 93.48140716552734, 585.8301391601562, 560.9436645507812, 224.85606384277344, -45.26654052734375, -217.92153930664062, 155.03887939453125, -296.3973083496094, 462.82183837890625, 415.17431640625, 65.0090560913086, -95.03427124023438, 31.1485595703125, -115.28324127197266, 43.4166259765625, 273.8667297363281, -322.0887451171875, 49.513160705566406, 256.19500732421875, -55.30207824707031, 17.432437896728516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000305.npy"}
|
||||
{"epoch": 0.447870778267254, "step": 306, "batch_size": 64, "mean": 158.99729919433594, "std": 197.92811584472656, "min": -350.03228759765625, "p10": -33.15047264099119, "median": 98.2620849609375, "p90": 415.8634063720703, "max": 893.9873046875, "pos_frac": 0.8125, "sample": [19.892963409423828, 15.165760040283203, 87.93447875976562, 1.37677001953125, 34.0355224609375, -65.61146545410156, 341.2049255371094, 8.599723815917969, 96.99920654296875, -8.879554748535156, 237.67100524902344, 449.9154357910156, 180.6060791015625, 412.59490966796875, 69.10653686523438, 85.96031951904297, 9.208011627197266, 469.0981140136719, 71.1196517944336, 50.18580627441406, 87.06719207763672, -59.96953582763672, -8.774032592773438, 432.5631408691406, 240.22970581054688, 327.20587158203125, -10.58890151977539, -6.665290832519531, 417.2641906738281, 105.84349060058594, 81.3819580078125, 144.29220581054688, 260.20928955078125, 326.12799072265625, 50.1400146484375, 145.4049530029297, 99.52496337890625, -89.30815887451172, 22.721569061279297, 327.8367919921875, -350.03228759765625, 324.89013671875, 91.09586334228516, -42.81971740722656, 51.52711486816406, 263.0102233886719, -45.868873596191406, 113.6034927368164, 535.4180908203125, 519.1979370117188, 149.29751586914062, 306.258056640625, 226.9849090576172, -86.02997589111328, 383.02691650390625, -8.140308380126953, 52.890995025634766, 40.22130584716797, 275.15631103515625, 117.81851196289062, 893.9873046875, 210.96884155273438, 315.87017822265625, 378.8025817871094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000306.npy"}
|
||||
{"epoch": 0.44933920704845814, "step": 307, "batch_size": 64, "mean": 162.99288940429688, "std": 214.6385040283203, "min": -428.0800476074219, "p10": -91.87994918823242, "median": 124.09592056274414, "p90": 466.37028503417974, "max": 772.4826049804688, "pos_frac": 0.796875, "sample": [413.80609130859375, 126.21607208251953, 131.78933715820312, 220.00650024414062, 180.41299438476562, 158.3192596435547, -121.93231201171875, 70.82290649414062, -81.13673400878906, 561.29248046875, 118.59085845947266, 13.447822570800781, 72.89329528808594, -150.7296142578125, 450.38433837890625, -428.0800476074219, 496.7172546386719, -167.33912658691406, 232.24411010742188, -81.95464324951172, 239.8687744140625, 277.8047180175781, 87.12657928466797, 169.17788696289062, 481.82122802734375, 253.76553344726562, 38.2673454284668, 121.97576904296875, 77.14434814453125, 498.8141174316406, 85.71408081054688, 473.2214050292969, 70.18732452392578, 100.95750427246094, -96.13365173339844, 107.26669311523438, -76.88546752929688, 47.87867736816406, 433.07763671875, 397.42547607421875, -47.20970916748047, 383.2994689941406, 68.28514862060547, 303.4508361816406, 205.29637145996094, 62.43873596191406, 356.3717041015625, 255.33944702148438, -15.862524032592773, 72.48692321777344, 211.83168029785156, 15.321281433105469, 356.79510498046875, 562.3297119140625, 284.4975280761719, 772.4826049804688, 134.2327880859375, -97.58502197265625, 223.16856384277344, 113.28518676757812, -39.71488952636719, 313.2087707519531, 79.60005187988281, -146.05111694335938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000307.npy"}
|
||||
{"epoch": 0.45080763582966227, "step": 308, "batch_size": 64, "mean": 186.27633666992188, "std": 192.6704864501953, "min": -194.6070098876953, "p10": -50.02389736175535, "median": 170.47956085205078, "p90": 458.3158172607422, "max": 549.701904296875, "pos_frac": 0.8125, "sample": [164.04110717773438, 449.00994873046875, -194.6070098876953, 238.33676147460938, 31.80682373046875, 512.5392456054688, -135.75177001953125, 6.383548736572266, 69.47930908203125, 52.50642395019531, 410.58843994140625, -8.28475570678711, 223.79705810546875, 93.98117065429688, 386.9210205078125, 481.8877868652344, -128.31727600097656, 169.9519805908203, 144.21896362304688, 14.280342102050781, 266.32220458984375, 512.874755859375, 294.87750244140625, 442.03424072265625, -97.81842041015625, 297.73858642578125, 123.55728149414062, 95.77925109863281, 121.18861389160156, -58.70549011230469, 257.11773681640625, 122.3065414428711, 171.00714111328125, 223.1925811767578, 278.2738037109375, 462.3040466308594, 504.25146484375, 214.22268676757812, -117.52455139160156, 66.52027893066406, 78.91175842285156, 353.99652099609375, 329.1504211425781, 151.42117309570312, 185.2294158935547, 549.701904296875, 228.94967651367188, 55.77418518066406, -24.70923614501953, 398.18988037109375, 537.00146484375, 281.2861633300781, 160.17843627929688, -10.627683639526367, -29.766847610473633, 49.56048583984375, 356.1224670410156, 376.5442199707031, 269.7379150390625, -21.96685028076172, 398.2511291503906, 218.84320068359375, -147.54893493652344, 15.165733337402344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000308.npy"}
|
||||
{"epoch": 0.4522760646108664, "step": 309, "batch_size": 64, "mean": 181.35653686523438, "std": 233.16058349609375, "min": -680.8449096679688, "p10": -96.4515380859375, "median": 182.12516021728516, "p90": 464.7881072998048, "max": 746.4739990234375, "pos_frac": 0.796875, "sample": [192.26107788085938, 77.80183410644531, 118.4080581665039, -117.76284790039062, 296.5398254394531, 175.87664794921875, 136.82411193847656, -17.057668685913086, 530.7652587890625, 61.4202880859375, 263.857177734375, 449.2525634765625, 111.5064926147461, 505.0340270996094, 65.66370391845703, 40.71831512451172, -91.60346984863281, 296.260009765625, 471.4461975097656, 299.9236145019531, 256.11517333984375, -680.8449096679688, 228.79901123046875, -128.38389587402344, 79.33206176757812, 160.65890502929688, 248.16744995117188, 292.15606689453125, 17.640975952148438, -3.538389205932617, -119.92316436767578, 632.0823974609375, 39.5240478515625, 1.5698699951171875, 138.67721557617188, 440.8614501953125, 746.4739990234375, 418.5617370605469, 128.04840087890625, -23.820785522460938, -46.68368148803711, 71.3046646118164, 65.22053527832031, 422.87664794921875, 298.905029296875, 299.78265380859375, 505.89349365234375, 410.312744140625, 301.25341796875, -26.351566314697266, 410.2513122558594, -172.3872528076172, 217.28903198242188, 298.8131408691406, -119.6441650390625, 62.367191314697266, 547.2381591796875, 394.05511474609375, -98.52928161621094, 198.87579345703125, 188.37367248535156, 21.18130874633789, 387.6904602050781, 229.43641662597656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000309.npy"}
|
||||
{"epoch": 0.45374449339207046, "step": 310, "batch_size": 64, "mean": 179.5365447998047, "std": 186.35250854492188, "min": -238.20074462890625, "p10": -43.69660034179687, "median": 169.74169921875, "p90": 411.90269775390635, "max": 634.8976440429688, "pos_frac": 0.828125, "sample": [-46.63601303100586, 170.38568115234375, 340.2966003417969, -50.081932067871094, 92.08216857910156, 313.88372802734375, 58.17323684692383, 41.42438507080078, 33.15321350097656, 212.73452758789062, -198.95877075195312, 393.0118408203125, 459.0502014160156, -36.83797073364258, 76.84284973144531, 312.1357727050781, 212.13417053222656, 246.7114715576172, 46.0037841796875, 200.36639404296875, -13.695331573486328, 110.45121765136719, 352.95086669921875, 84.14419555664062, 165.3050537109375, 540.899658203125, 336.98193359375, 37.573307037353516, 18.039857864379883, 315.494140625, -238.20074462890625, 634.8976440429688, 0.6010856628417969, 150.2023468017578, 553.0578002929688, 184.25811767578125, 257.8673095703125, -22.99543571472168, -61.31641387939453, -28.02918243408203, 71.38333129882812, 159.270263671875, 327.3167724609375, 248.54530334472656, 107.05390167236328, 294.73760986328125, 419.998779296875, -114.42493438720703, 352.5291748046875, 254.34677124023438, 355.3558044433594, -53.03620147705078, 330.2608947753906, 183.6138153076172, 63.37480163574219, 69.56916046142578, 92.63470458984375, 510.080322265625, 495.6155090332031, 236.64344787597656, 77.69091796875, 377.936767578125, 206.38156127929688, 169.09771728515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000310.npy"}
|
||||
{"epoch": 0.4552129221732746, "step": 311, "batch_size": 64, "mean": 195.67971801757812, "std": 252.9629669189453, "min": -252.5065155029297, "p10": -106.75041503906246, "median": 154.24077606201172, "p90": 567.5149536132814, "max": 886.8372802734375, "pos_frac": 0.734375, "sample": [477.02703857421875, 377.3390808105469, 392.7413024902344, -8.673942565917969, -159.3258819580078, 534.3909912109375, -123.71543884277344, 232.5424346923828, 154.987060546875, 577.2904052734375, 268.8434143066406, 76.685302734375, 339.2510986328125, 716.6187744140625, 318.96527099609375, 115.35037994384766, 496.660888671875, 139.17369079589844, -38.960975646972656, 597.384521484375, 193.6949920654297, 223.5870361328125, 522.223388671875, 236.47557067871094, 50.96953582763672, 164.0123748779297, -51.38795471191406, -181.57498168945312, 187.54293823242188, 123.64434051513672, -69.04086303710938, 299.7776184082031, -136.65728759765625, 134.6296844482422, 265.90667724609375, -122.91165161132812, -6.155799865722656, 886.8372802734375, -19.411102294921875, 262.2821044921875, 51.688026428222656, 49.04499053955078, 215.74465942382812, 54.15264892578125, 608.2066650390625, 422.0917053222656, 28.141803741455078, -43.65557861328125, -24.46389389038086, 100.15327453613281, -22.426471710205078, 544.70556640625, 151.55206298828125, 3.0470809936523438, -252.5065155029297, 459.08404541015625, 153.49449157714844, 223.51693725585938, 14.293724060058594, 259.5645751953125, 626.8216552734375, -176.9297332763672, -45.47759246826172, 674.6377563476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000311.npy"}
|
||||
{"epoch": 0.4566813509544787, "step": 312, "batch_size": 64, "mean": 183.03475952148438, "std": 254.16796875, "min": -316.9760437011719, "p10": -84.70376663208006, "median": 127.37588882446289, "p90": 487.84434814453124, "max": 1046.13330078125, "pos_frac": 0.8125, "sample": [118.89085388183594, 1046.13330078125, 535.4944458007812, 182.50228881835938, 107.61698913574219, 49.99066925048828, -89.26264190673828, 102.93397521972656, 120.70011138916016, 336.81427001953125, -275.92987060546875, 397.6422424316406, 958.0731201171875, 353.5318603515625, 250.80682373046875, 289.08905029296875, -202.8370361328125, 242.20367431640625, 403.19970703125, 154.44482421875, 518.0602416992188, 87.74708557128906, 138.6531219482422, 252.88180541992188, 79.45269775390625, 36.16191101074219, 32.779136657714844, 739.53271484375, 347.7415466308594, 489.60552978515625, 123.74978637695312, 428.2049865722656, 176.39254760742188, 97.68000793457031, -121.84490966796875, 99.03944396972656, 168.1685333251953, 483.73492431640625, -33.06944274902344, 57.4864616394043, 279.6922607421875, 123.80577850341797, -47.030059814453125, 247.7726287841797, 177.56903076171875, 17.118499755859375, -36.339569091796875, 642.150634765625, -74.06639099121094, 10.550085067749023, 182.1996612548828, 119.73362731933594, -157.86367797851562, 10.2388916015625, -90.45723724365234, 130.9459991455078, 150.5576171875, -0.515594482421875, -316.9760437011719, 393.9548034667969, 256.2677307128906, 19.087905883789062, 43.9561653137207, 347.67535400390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000312.npy"}
|
||||
{"epoch": 0.4581497797356828, "step": 313, "batch_size": 64, "mean": 189.80377197265625, "std": 257.02191162109375, "min": -448.249755859375, "p10": -102.16852722167962, "median": 182.32271575927734, "p90": 510.21256713867194, "max": 887.564697265625, "pos_frac": 0.796875, "sample": [76.31976318359375, 292.8656311035156, 118.08229064941406, 304.941162109375, -141.4256591796875, -8.957595825195312, 495.1204833984375, 178.41650390625, 372.14923095703125, -47.4661979675293, 16.883560180664062, -168.75440979003906, 454.48858642578125, 146.8878936767578, 20.834230422973633, 239.46092224121094, 516.6806030273438, 197.5662078857422, 95.50232696533203, 211.1004638671875, 59.194007873535156, 336.9914855957031, -422.2420959472656, -157.02809143066406, 191.9110107421875, 581.3397827148438, 277.0716552734375, 73.81944274902344, 187.48223876953125, 887.564697265625, 336.529296875, 151.63372802734375, 35.07958984375, 186.2289276123047, -221.86947631835938, 144.55258178710938, -29.727399826049805, -448.249755859375, 419.4637145996094, 219.5740509033203, -14.897056579589844, 244.63555908203125, 57.6143684387207, 340.59906005859375, 317.4919738769531, 127.36016082763672, -34.897178649902344, 345.6910400390625, 296.22650146484375, -125.61238098144531, 435.24993896484375, 110.61068725585938, 151.43914794921875, 17.11359405517578, 214.9268798828125, 585.2310791015625, 623.834716796875, -9.334030151367188, 4.685386657714844, 79.30758666992188, 291.1219787597656, 350.2532958984375, 744.2491455078125, 814.5241088867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000313.npy"}
|
||||
{"epoch": 0.45961820851688695, "step": 314, "batch_size": 64, "mean": 176.21929931640625, "std": 278.32574462890625, "min": -378.21734619140625, "p10": -161.98898773193358, "median": 134.08779525756836, "p90": 493.9214111328125, "max": 903.6690063476562, "pos_frac": 0.71875, "sample": [768.0145263671875, -276.85980224609375, 112.35454559326172, 42.07299041748047, 32.580780029296875, 257.62908935546875, 38.271881103515625, 351.6828918457031, 82.44602966308594, -135.1967010498047, 366.4335021972656, 13.904220581054688, 249.59759521484375, 101.84288024902344, 190.13259887695312, -378.21734619140625, -178.37474060058594, 37.07112121582031, 653.15234375, 779.2003784179688, 30.4876708984375, -25.84440040588379, -172.74185180664062, 58.86556625366211, 195.4986572265625, -165.66453552246094, -13.700241088867188, -140.9130096435547, -32.369773864746094, 426.6202392578125, 482.96710205078125, 14.9329833984375, 258.498046875, 494.62310791015625, 302.2585754394531, 174.31045532226562, -130.9000244140625, 155.821044921875, 47.38824462890625, 805.0208129882812, 426.039306640625, 332.26654052734375, 903.6690063476562, 492.28411865234375, -93.78610229492188, 353.89508056640625, -158.29812622070312, 450.86383056640625, -24.402679443359375, 232.69467163085938, 352.88311767578125, 40.08634948730469, 293.86138916015625, 356.3077392578125, 463.3469543457031, 532.4640502929688, 86.57267761230469, 272.6033630371094, 240.75364685058594, -46.77448272705078, -163.57078552246094, -43.53178024291992, 325.8720703125, -220.9628143310547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000314.npy"}
|
||||
{"epoch": 0.461086637298091, "step": 315, "batch_size": 64, "mean": 157.57626342773438, "std": 219.61166381835938, "min": -205.828857421875, "p10": -85.47158355712891, "median": 110.84260559082031, "p90": 502.2987823486329, "max": 739.72119140625, "pos_frac": 0.734375, "sample": [556.0833740234375, -65.42273712158203, 288.6806640625, 8.27349853515625, -63.604183197021484, 50.83769226074219, 64.30699157714844, 113.44154357910156, 280.53558349609375, -37.969573974609375, 280.3326416015625, 26.583986282348633, -65.89610290527344, 265.84735107421875, 86.81407928466797, 548.7103881835938, 314.45306396484375, 170.77520751953125, -145.73025512695312, 351.5758361816406, 215.2080078125, 517.091552734375, 543.7828369140625, 60.35565185546875, -132.63604736328125, 197.155517578125, 261.38165283203125, 174.3347625732422, 407.2236022949219, 158.5758514404297, 432.6559143066406, 72.17689514160156, 78.77655029296875, 108.24366760253906, 182.03871154785156, 310.02581787109375, -109.33016967773438, -32.0667724609375, 240.08091735839844, 14.526336669921875, 33.9686164855957, -83.59457397460938, 739.72119140625, 55.756134033203125, 219.43875122070312, -6.047157287597656, -179.14968872070312, 6.207841873168945, -86.27601623535156, 610.2633056640625, 73.34133911132812, 260.64959716796875, 636.116455078125, 135.168212890625, 149.08921813964844, -124.61419677734375, 286.4572448730469, -30.81216049194336, 406.1697082519531, 467.7823181152344, -205.828857421875, -61.50956726074219, 63.652286529541016, -9.298927307128906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000315.npy"}
|
||||
{"epoch": 0.46255506607929514, "step": 316, "batch_size": 64, "mean": 234.00823974609375, "std": 253.26156616210938, "min": -249.42880249023438, "p10": -5.3934284210205, "median": 161.11019134521484, "p90": 599.8680297851563, "max": 953.4509887695312, "pos_frac": 0.890625, "sample": [330.65399169921875, 126.16007995605469, 496.6202392578125, 64.07371520996094, 45.908348083496094, 930.8297119140625, 148.41973876953125, 692.9951782226562, 480.9593505859375, 651.9796752929688, 389.141357421875, 338.4876708984375, 40.08734130859375, 27.385196685791016, 28.46808624267578, 213.37318420410156, 66.73484802246094, 108.95332336425781, 1.4512596130371094, 434.055908203125, 200.43917846679688, 27.239749908447266, 609.572998046875, 457.6888427734375, 293.8634338378906, 127.87871551513672, 68.00299072265625, 605.9739379882812, 153.7977752685547, 519.26904296875, 7.57020378112793, 157.46893310546875, 45.41107177734375, 164.75144958496094, 207.21194458007812, 350.048095703125, 416.466796875, 55.737525939941406, 383.220458984375, 24.70447540283203, 585.6209106445312, 334.48968505859375, 41.46893310546875, 390.9344177246094, 186.23023986816406, 147.56430053710938, -233.8511505126953, 22.084789276123047, 953.4509887695312, 128.27098083496094, 783.04736328125, -249.42880249023438, 193.94725036621094, 204.0638427734375, 368.7958984375, -8.326866149902344, -20.665245056152344, -21.671279907226562, -45.12848663330078, 134.88259887695312, 235.47781372070312, -106.1695785522461, 120.7308349609375, 337.652099609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000316.npy"}
|
||||
{"epoch": 0.46402349486049926, "step": 317, "batch_size": 64, "mean": 247.40829467773438, "std": 237.35400390625, "min": -155.39695739746094, "p10": -49.207481384277344, "median": 221.45252990722656, "p90": 585.6850280761719, "max": 704.5596923828125, "pos_frac": 0.796875, "sample": [358.93572998046875, 38.918678283691406, 96.56397247314453, 464.7784729003906, 470.8497619628906, 77.91439819335938, 449.00762939453125, 375.2138671875, -154.07321166992188, 585.7653198242188, 704.5596923828125, 144.68833923339844, 225.38595581054688, -16.700927734375, 138.29891967773438, 420.0428466796875, 176.18116760253906, -19.10326385498047, 99.1728286743164, 425.173583984375, -46.6116943359375, 346.45001220703125, 374.42413330078125, 521.9319458007812, 41.61216354370117, 539.9659423828125, -20.34756088256836, 104.76262664794922, 663.1993408203125, 406.6878662109375, 627.3464965820312, 454.2076416015625, 95.07261657714844, 602.257568359375, 432.7744140625, -57.61360168457031, 283.8833923339844, 98.21998596191406, 265.3254089355469, 116.94760131835938, 217.51910400390625, 547.717041015625, -29.40418815612793, -50.31996154785156, 662.5891723632812, 212.63543701171875, 585.4976806640625, 335.4384765625, 270.8809814453125, -80.25594329833984, 145.58665466308594, 87.01099395751953, 265.4127197265625, -29.29521942138672, -144.68630981445312, 136.791259765625, 148.91055297851562, -124.89141082763672, 651.428466796875, 408.92218017578125, 100.55377960205078, 405.3900146484375, -155.39695739746094, 354.0262145996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000317.npy"}
|
||||
{"epoch": 0.4654919236417034, "step": 318, "batch_size": 64, "mean": 185.17645263671875, "std": 237.19442749023438, "min": -343.9800720214844, "p10": -70.54890975952148, "median": 146.36261749267578, "p90": 525.7545227050783, "max": 840.9328002929688, "pos_frac": 0.8125, "sample": [62.73853302001953, -23.393722534179688, 811.581787109375, 88.53070068359375, 251.4974365234375, 121.56343078613281, -57.549461364746094, 191.13851928710938, 130.14154052734375, 676.5836181640625, 13.704195022583008, 111.308349609375, 336.2889404296875, 557.1399536132812, 620.1986694335938, 218.63731384277344, 6.5337677001953125, 140.1768035888672, -103.25774383544922, -50.15784454345703, 589.8583374023438, 66.57622528076172, -113.29124450683594, 486.62164306640625, 840.9328002929688, 305.8353271484375, -29.91756820678711, -114.33790588378906, 214.3293914794922, 297.9775695800781, 88.28457641601562, 321.9084777832031, 53.17017364501953, 396.1171569824219, 214.35105895996094, 140.64407348632812, 542.5257568359375, 32.51080322265625, -19.012908935546875, 432.4804382324219, 414.08782958984375, 141.64871215820312, 10.668170928955078, -106.74417877197266, 347.62432861328125, 185.78457641601562, 105.45372772216797, 212.6168212890625, 151.07652282714844, -76.12010192871094, -335.697509765625, 47.746253967285156, 222.366455078125, 206.67510986328125, 368.1853942871094, 371.3782958984375, 232.57760620117188, 159.0518798828125, 56.09101867675781, 274.5690002441406, 136.04290771484375, 200.50677490234375, -343.9800720214844, 18.713279724121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000318.npy"}
|
||||
{"epoch": 0.4669603524229075, "step": 319, "batch_size": 64, "mean": 220.9802703857422, "std": 247.9208526611328, "min": -310.4207763671875, "p10": -108.69849700927735, "median": 221.92739868164062, "p90": 542.6328002929688, "max": 986.8610229492188, "pos_frac": 0.8125, "sample": [293.454345703125, 159.0634002685547, 306.29486083984375, 257.2603454589844, 40.00965881347656, 251.908447265625, 359.7613830566406, 511.44024658203125, 236.50729370117188, 84.18103790283203, -109.12783813476562, -216.93069458007812, 334.66424560546875, 115.38090515136719, 307.5240783691406, 139.09764099121094, 53.558815002441406, 251.2245635986328, 257.9903869628906, 180.63311767578125, 185.20249938964844, 396.8950500488281, 217.10760498046875, 232.27923583984375, 328.24920654296875, 20.52142333984375, 368.2442626953125, 794.3787841796875, 413.8382873535156, 353.64422607421875, -133.57720947265625, 162.20819091796875, 178.0954132080078, 530.86328125, -246.64016723632812, 384.7618408203125, -310.4207763671875, -97.9541015625, -133.8709716796875, 395.6573486328125, 556.1326904296875, 439.5705871582031, 592.6989135742188, -1.4154033660888672, 177.25502014160156, 43.34764099121094, 547.6768798828125, 618.9849853515625, 270.3029479980469, 588.8283081054688, 986.8610229492188, -107.69670104980469, 154.615966796875, 421.575439453125, -29.117481231689453, 37.92138671875, -174.57070922851562, -45.11023712158203, 362.5239562988281, 157.35816955566406, 121.79132080078125, 193.12811279296875, 226.7471923828125, 149.94715881347656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000319.npy"}
|
||||
{"epoch": 0.4684287812041116, "step": 320, "batch_size": 64, "mean": 240.2877960205078, "std": 259.0086364746094, "min": -290.53857421875, "p10": -66.37993240356445, "median": 222.9927215576172, "p90": 578.1055725097658, "max": 1003.077392578125, "pos_frac": 0.8125, "sample": [153.32179260253906, 190.2967987060547, 870.534912109375, 119.51783752441406, 269.91412353515625, -99.4038314819336, 180.46881103515625, -290.53857421875, 810.8246459960938, 201.6070098876953, 442.5947265625, 348.1700134277344, -36.93687438964844, 356.6315612792969, 221.95816040039062, 538.2990112304688, 611.4115600585938, 379.6877136230469, 363.52777099609375, -142.841552734375, 365.33746337890625, 365.5406494140625, -38.358360290527344, 253.8314208984375, 72.15259552001953, 0.37609100341796875, 303.99029541015625, 285.8558654785156, -62.31834411621094, 595.16552734375, -25.860397338867188, 244.51754760742188, -68.12061309814453, 146.21884155273438, 448.18267822265625, 108.4854736328125, 160.773681640625, 216.20712280273438, 1003.077392578125, 364.4312438964844, 380.6422119140625, 90.74359893798828, 102.21047973632812, -60.190704345703125, 659.0121459960938, 60.101097106933594, 400.98724365234375, 209.51087951660156, 0.3621387481689453, 16.567031860351562, 363.4917907714844, 369.80804443359375, 265.59515380859375, 444.18572998046875, 485.41241455078125, 632.2578125, 529.3140869140625, 9.985563278198242, -90.91989135742188, -71.2099838256836, 48.64849853515625, 224.02728271484375, 253.28018188476562, -173.93548583984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000320.npy"}
|
||||
{"epoch": 0.4698972099853157, "step": 321, "batch_size": 64, "mean": 204.66717529296875, "std": 275.3786315917969, "min": -403.501953125, "p10": -72.7759552001953, "median": 183.17898559570312, "p90": 564.9298583984377, "max": 1024.70458984375, "pos_frac": 0.765625, "sample": [683.3513793945312, 119.2746810913086, 294.96820068359375, 262.3550720214844, -41.81322479248047, 274.127685546875, 324.8550720214844, 269.725830078125, 171.17620849609375, 502.7108154296875, 441.2209777832031, 365.5379638671875, 107.85263061523438, 40.47676467895508, -239.47337341308594, 298.16815185546875, 440.85546875, 225.11135864257812, 172.84091186523438, 579.9376220703125, 802.97314453125, 161.89474487304688, -403.501953125, 324.0243835449219, 196.3305206298828, 299.5929870605469, 730.6976928710938, -319.00390625, -128.6186981201172, 138.11073303222656, 67.39064025878906, 529.9117431640625, -40.720184326171875, 362.74188232421875, 185.01467895507812, 401.9207458496094, 241.9503631591797, -299.15997314453125, 1024.70458984375, 47.51996612548828, -59.382110595703125, 66.77050018310547, 22.104516983032227, 112.89662170410156, -31.886619567871094, 152.1680450439453, -78.51617431640625, -26.165699005126953, 253.93453979492188, 263.33135986328125, 664.645751953125, 676.4437255859375, -7.971218109130859, -318.0367431640625, 181.8668212890625, 180.45663452148438, 145.717529296875, 150.10462951660156, -20.461027145385742, 466.69989013671875, 184.49114990234375, 286.6522521972656, 272.44378662109375, -56.642730712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000321.npy"}
|
||||
{"epoch": 0.4713656387665198, "step": 322, "batch_size": 64, "mean": 226.37049865722656, "std": 241.35641479492188, "min": -311.0309143066406, "p10": -55.31768341064453, "median": 247.78831481933594, "p90": 517.6384399414063, "max": 828.6724853515625, "pos_frac": 0.765625, "sample": [-186.1815185546875, 822.2559814453125, 346.1003723144531, 828.6724853515625, 90.18206024169922, -70.43307495117188, 522.3855590820312, 382.5658264160156, 317.96856689453125, -78.82754516601562, -57.86711120605469, -24.024070739746094, 337.60638427734375, 274.292724609375, 310.66259765625, 362.24969482421875, -6.785392761230469, 241.056640625, -21.849037170410156, -116.31442260742188, 604.5067138671875, 565.617919921875, 237.43807983398438, 568.4808959960938, -49.3690185546875, 261.4618225097656, 349.4473876953125, 506.56182861328125, 46.493263244628906, 476.46124267578125, -31.265775680541992, 215.3662109375, -13.566570281982422, 375.7733154296875, -6.0302734375, 20.632579803466797, 259.140625, 185.8727569580078, 12.938907623291016, 325.724365234375, 462.02685546875, 415.7691650390625, 413.95269775390625, -114.7860336303711, 478.48779296875, 135.61268615722656, 283.87060546875, -30.639690399169922, 249.97647094726562, 42.77830505371094, 74.50305938720703, 666.2825317382812, 195.39303588867188, 310.7602844238281, 371.4504699707031, 392.53607177734375, 97.65899658203125, 82.24939727783203, 58.30311584472656, -311.0309143066406, 9.963836669921875, 352.7374267578125, 418.8516540527344, 245.60015869140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000322.npy"}
|
||||
{"epoch": 0.47283406754772395, "step": 323, "batch_size": 64, "mean": 190.0548095703125, "std": 247.40451049804688, "min": -478.317138671875, "p10": -73.42815551757812, "median": 177.86404418945312, "p90": 540.1785522460938, "max": 804.090576171875, "pos_frac": 0.8125, "sample": [419.9819030761719, 268.57965087890625, 550.8311767578125, -478.317138671875, 572.2843627929688, -330.37188720703125, 560.0254516601562, 804.090576171875, 19.741413116455078, 69.73229217529297, 285.7994079589844, 53.04541015625, 180.93423461914062, 136.67457580566406, 15.320213317871094, 318.8656005859375, 96.3198013305664, 299.5260925292969, -68.32178497314453, 639.7701416015625, 5.590507507324219, 97.09178161621094, 453.21014404296875, -75.6166000366211, -92.87715148925781, 395.1554260253906, 161.2142791748047, 152.04464721679688, 377.3531494140625, 27.582000732421875, 545.650146484375, 287.7725830078125, 402.83099365234375, -22.647932052612305, 311.3936767578125, 22.286170959472656, -64.92683410644531, 174.79385375976562, 281.6689453125, 227.38311767578125, 308.03936767578125, 35.95965576171875, 245.507568359375, 527.4114990234375, -329.4935302734375, 173.75779724121094, 312.82171630859375, 585.6963500976562, 390.5191650390625, -61.741111755371094, 236.86111450195312, 526.6721801757812, -86.45342254638672, 275.9896545410156, -274.93890380859375, 225.57815551757812, 128.68084716796875, 265.8896179199219, 83.7546615600586, 282.2542724609375, 47.75102996826172, -2.6339454650878906, 136.24197387695312, 47.91845703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000323.npy"}
|
||||
{"epoch": 0.47430249632892807, "step": 324, "batch_size": 64, "mean": 183.44676208496094, "std": 277.77154541015625, "min": -428.9640808105469, "p10": -118.3505661010742, "median": 152.69564819335938, "p90": 510.24022521972677, "max": 957.6033935546875, "pos_frac": 0.765625, "sample": [-210.86807250976562, 661.2820434570312, -61.82513427734375, 162.9856719970703, 33.881507873535156, 174.733642578125, 240.15020751953125, 314.1515808105469, 143.7482147216797, 191.560791015625, 816.9892578125, -94.80575561523438, 158.46005249023438, 92.44688415527344, 23.516674041748047, 693.47802734375, -234.41087341308594, -322.309814453125, 342.9140625, 249.50567626953125, 24.620681762695312, 141.81195068359375, -127.67169189453125, 201.88275146484375, 331.95977783203125, 856.3279418945312, 448.42022705078125, -78.15602111816406, 84.44198608398438, 423.9747619628906, 460.3378601074219, 121.92744445800781, 74.86824798583984, 93.02396392822266, 284.6026306152344, 327.2620544433594, 105.75984191894531, 319.8751220703125, 202.90097045898438, 0.5331325531005859, 416.6932067871094, -428.9640808105469, -13.575019836425781, 390.4256896972656, 531.626953125, 368.8822021484375, 148.03334045410156, -304.3565979003906, -133.44775390625, 95.9260025024414, 233.87551879882812, 48.18450164794922, 157.3579559326172, -52.441158294677734, 91.65632629394531, 66.55154418945312, 423.1199645996094, -96.60127258300781, -29.626407623291016, 957.6033935546875, 693.5258178710938, 208.73265075683594, -31.25518798828125, 324.376708984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000324.npy"}
|
||||
{"epoch": 0.47577092511013214, "step": 325, "batch_size": 64, "mean": 157.63201904296875, "std": 214.16908264160156, "min": -358.1204833984375, "p10": -71.23831977844236, "median": 141.68650817871094, "p90": 457.13458557128916, "max": 796.6693115234375, "pos_frac": 0.828125, "sample": [-348.6066589355469, -358.1204833984375, 262.14117431640625, -26.33391571044922, 330.415283203125, 285.792236328125, 126.0806655883789, -87.24609375, 170.65621948242188, 287.3238525390625, 329.09576416015625, 486.23699951171875, 302.39501953125, 155.2794952392578, 186.57289123535156, 127.8382339477539, 43.13910675048828, 87.11952209472656, 47.31947326660156, 234.80442810058594, -133.34701538085938, 247.38543701171875, 283.6258239746094, 501.2336120605469, 23.642635345458984, 69.35967254638672, 118.81288146972656, 87.72246551513672, 121.49519348144531, 23.25865936279297, 488.9944763183594, -14.001480102539062, 796.6693115234375, 21.59012222290039, 518.2757568359375, 43.16551971435547, 465.00439453125, -49.54945755004883, 243.35153198242188, 225.5790557861328, -80.5335464477539, 172.14739990234375, 373.4538269042969, -39.87945556640625, 218.733642578125, 288.7169189453125, 144.03314208984375, 218.1207275390625, 120.97201538085938, 139.33987426757812, 312.36297607421875, -302.112060546875, 23.780208587646484, 659.9786376953125, 24.28057861328125, 19.530006408691406, 438.7716979980469, 30.476388931274414, -109.55010986328125, 246.18707275390625, 158.1874237060547, 41.88057327270508, 59.45310974121094, 205.94577026367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000325.npy"}
|
||||
{"epoch": 0.47723935389133626, "step": 326, "batch_size": 64, "mean": 154.7425537109375, "std": 185.08230590820312, "min": -370.7458190917969, "p10": -35.13610076904296, "median": 134.89266967773438, "p90": 374.76379394531256, "max": 622.933837890625, "pos_frac": 0.796875, "sample": [-4.8787841796875, -12.988643646240234, 110.23500061035156, 379.5883483886719, 139.7750701904297, -10.688232421875, 139.64114379882812, 130.14419555664062, 314.97552490234375, -199.72872924804688, -111.42218017578125, -370.7458190917969, 519.5944213867188, 98.85033416748047, -80.89998626708984, 164.3375244140625, 174.4683837890625, 80.14857482910156, 273.63232421875, 252.84237670898438, 21.471038818359375, 112.81924438476562, 237.6820068359375, 232.801025390625, -27.6998291015625, 146.8149871826172, 622.933837890625, 190.4734344482422, 98.00386047363281, 110.99613952636719, 341.0379638671875, 202.64202880859375, 247.25999450683594, 120.1751708984375, 191.58441162109375, -19.4984130859375, 47.54498291015625, 363.5065002441406, -38.42325210571289, 109.11215209960938, 116.41337585449219, 15.406982421875, 99.28846740722656, 168.204345703125, 27.44134521484375, 108.75727844238281, 285.6934814453125, 332.7947692871094, 220.5515899658203, 225.8643035888672, -38.32307434082031, 419.5797424316406, 617.703125, 76.22212982177734, 328.8656005859375, 156.61904907226562, 499.8414306640625, 313.8556213378906, -1.1342411041259766, -194.9139862060547, 424.1837158203125, 314.32598876953125, 21.201953887939453, 66.96248626708984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000326.npy"}
|
||||
{"epoch": 0.4787077826725404, "step": 327, "batch_size": 64, "mean": 219.50949096679688, "std": 220.30361938476562, "min": -259.93951416015625, "p10": -88.53612670898434, "median": 242.6549835205078, "p90": 503.66260681152346, "max": 724.9379272460938, "pos_frac": 0.828125, "sample": [234.39013671875, 203.6343536376953, 445.65301513671875, 219.52659606933594, 212.1580352783203, 330.5289306640625, 44.431617736816406, 248.49609375, 329.2371520996094, 155.57139587402344, 357.1671142578125, 330.8643798828125, 119.25703430175781, 333.07257080078125, -106.2027359008789, 392.5955810546875, 3.0414466857910156, 491.8763122558594, 338.9151306152344, 290.76080322265625, -242.32205200195312, 608.1571044921875, 504.452880859375, 394.78662109375, 18.278966903686523, 520.7158203125, 286.89361572265625, -126.94148254394531, 232.07952880859375, 296.3932189941406, 383.3774719238281, -130.15005493164062, -26.112625122070312, 141.34571838378906, 293.66119384765625, 172.17771911621094, 287.431884765625, 261.65460205078125, 249.74197387695312, 181.41259765625, -5.8489837646484375, 101.30068969726562, -61.281158447265625, 470.268310546875, 236.81387329101562, -100.21682739257812, 41.06917190551758, 343.32745361328125, 297.4220275878906, 60.146575927734375, 582.9537963867188, 91.83869934082031, 724.9379272460938, -259.93951416015625, 501.8186340332031, -37.668212890625, 67.75386047363281, 579.8435668945312, 69.22465515136719, 418.26007080078125, 264.98687744140625, 514.5975341796875, 102.20282745361328, -237.21395874023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000327.npy"}
|
||||
{"epoch": 0.4801762114537445, "step": 328, "batch_size": 64, "mean": 196.95599365234375, "std": 243.94415283203125, "min": -380.948974609375, "p10": -59.63810844421387, "median": 162.02967834472656, "p90": 502.1053100585939, "max": 911.7060546875, "pos_frac": 0.75, "sample": [180.97952270507812, -130.64315795898438, 178.00869750976562, 156.32571411132812, 13.432373046875, 378.8526611328125, 343.1452941894531, 123.55448913574219, 39.73124694824219, -380.948974609375, 112.49304962158203, 414.2932434082031, 462.86444091796875, 374.06201171875, 252.09605407714844, 433.44403076171875, -24.85417938232422, -150.3915252685547, -58.94183349609375, 214.80499267578125, -45.354915618896484, -31.745697021484375, 311.1563720703125, 911.7060546875, 218.70416259765625, 276.9925537109375, 389.5688781738281, 156.59893798828125, 663.8065185546875, 367.0074768066406, -73.66650390625, 191.70474243164062, 292.4734802246094, -59.9365119934082, 63.92498779296875, 131.59988403320312, 310.8701477050781, 400.50714111328125, 159.25778198242188, 127.13670349121094, -9.044023513793945, 137.57745361328125, -101.76751708984375, 191.26031494140625, -15.701896667480469, 470.2608642578125, 107.5684814453125, 247.8949432373047, 250.75732421875, 164.80157470703125, 543.248779296875, 515.7529296875, 40.19648742675781, -15.242706298828125, -16.450206756591797, 682.2476806640625, 798.3515625, 67.86215209960938, 639.2640991210938, -10.98019027709961, 13.510627746582031, 305.248046875, 31.78538703918457, -127.8382797241211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000328.npy"}
|
||||
{"epoch": 0.48164464023494863, "step": 329, "batch_size": 64, "mean": 215.82797241210938, "std": 268.51751708984375, "min": -665.3629150390625, "p10": -53.10569152832031, "median": 209.48905181884766, "p90": 558.4554260253907, "max": 931.0707397460938, "pos_frac": 0.796875, "sample": [186.23880004882812, 403.8968505859375, 519.7359619140625, 304.05181884765625, 349.9959411621094, -67.99321746826172, 479.2383728027344, -6.255027770996094, 483.098876953125, 235.89315795898438, 58.72955322265625, 372.1159362792969, 60.481239318847656, -49.61094665527344, 66.01875305175781, 154.18630981445312, 233.52455139160156, 209.87802124023438, 28.20915412902832, 761.3773193359375, 90.25530242919922, 92.35269165039062, 172.55018615722656, 175.34671020507812, -665.3629150390625, 307.6755065917969, 38.54149627685547, 931.0707397460938, 613.2645263671875, -44.43959045410156, 290.2068176269531, -27.397212982177734, 355.4056701660156, 147.3945770263672, 257.640869140625, 398.2454528808594, 567.9192504882812, 327.66363525390625, 735.06884765625, 226.6058349609375, 31.439640045166016, -49.28700637817383, -141.41868591308594, 536.3731689453125, 455.1905822753906, -156.230712890625, 304.7721252441406, -118.87513732910156, 371.2033386230469, 168.5750274658203, 232.49124145507812, -54.60343933105469, 209.10008239746094, 262.5513610839844, 81.03278350830078, 48.67076873779297, 231.17222595214844, 441.572509765625, 695.5701904296875, -33.35357666015625, -278.8992614746094, 167.0413818359375, 598.8903198242188, 37.19092559814453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000329.npy"}
|
||||
{"epoch": 0.4831130690161527, "step": 330, "batch_size": 64, "mean": 225.97341918945312, "std": 255.16845703125, "min": -300.02813720703125, "p10": -84.93160705566405, "median": 233.19058227539062, "p90": 508.0591064453125, "max": 889.980712890625, "pos_frac": 0.828125, "sample": [473.21533203125, -34.8037109375, -150.3995361328125, -91.9307861328125, 323.6458740234375, 510.05133056640625, 161.83314514160156, 207.69357299804688, -63.53241729736328, 107.92980194091797, -120.95191955566406, 133.3959197998047, 179.73171997070312, 417.00457763671875, 503.41058349609375, 45.769142150878906, 282.8362731933594, 275.6679382324219, 254.38064575195312, 424.3050231933594, 1.1580638885498047, 102.41847229003906, 338.4024658203125, 79.72218322753906, 173.6884765625, 347.8099365234375, -300.02813720703125, 319.00311279296875, 228.89227294921875, 237.4888916015625, 398.4638366699219, 10.633831024169922, -283.24542236328125, 238.27696228027344, 534.23291015625, 192.8701171875, 46.11070251464844, 782.5248413085938, 34.932899475097656, 154.34898376464844, -14.849647521972656, 447.40875244140625, 133.03729248046875, 889.980712890625, -236.92198181152344, 809.125732421875, 689.8173217773438, 347.8549499511719, 375.42938232421875, 73.57452392578125, 324.91387939453125, 374.10302734375, 157.43606567382812, 700.0819702148438, 3.8208541870117188, 298.8936767578125, 250.74014282226562, 445.43115234375, -167.69223022460938, 428.93426513671875, 108.48590087890625, 324.31353759765625, 290.0228271484375, -68.60018920898438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000330.npy"}
|
||||
{"epoch": 0.4845814977973568, "step": 331, "batch_size": 64, "mean": 226.6082763671875, "std": 190.3015594482422, "min": -156.85577392578125, "p10": 18.27304611206057, "median": 199.54584503173828, "p90": 426.6102630615236, "max": 818.7810668945312, "pos_frac": 0.90625, "sample": [162.4906005859375, 743.9522705078125, 268.01837158203125, -27.52154541015625, 287.1692810058594, 366.7225341796875, 94.09030151367188, 43.79722595214844, 223.43406677246094, 154.89263916015625, 732.092529296875, 272.9354553222656, 818.7810668945312, 8.109687805175781, 178.7772979736328, 354.68353271484375, 575.6387939453125, 108.35737609863281, 148.69180297851562, 203.57876586914062, 443.8098449707031, 386.4779052734375, 467.12933349609375, -33.84138107299805, 108.03665161132812, 153.1386260986328, 192.57725524902344, 224.38119506835938, 255.37734985351562, -49.0325927734375, 262.9227600097656, -57.6031379699707, 273.205078125, 185.98687744140625, 42.56554412841797, 179.57827758789062, -156.85577392578125, 197.76763916015625, 332.01239013671875, 576.7354125976562, 231.88308715820312, 201.3240509033203, 107.86432647705078, 356.523681640625, 78.15389251708984, 86.67462158203125, 319.277099609375, 165.62139892578125, 295.38995361328125, 93.77726745605469, -119.26898193359375, 278.9136962890625, 125.84742736816406, 99.78758239746094, 344.2547302246094, 330.6400146484375, 315.82550048828125, 98.70240020751953, 191.96348571777344, 381.58099365234375, 168.52894592285156, 257.6112365722656, 347.004150390625, 41.987548828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000331.npy"}
|
||||
{"epoch": 0.48604992657856094, "step": 332, "batch_size": 64, "mean": 164.1973876953125, "std": 232.5335235595703, "min": -199.33279418945312, "p10": -102.41101531982422, "median": 116.62026596069336, "p90": 442.1667419433594, "max": 982.8802490234375, "pos_frac": 0.75, "sample": [717.8234252929688, 163.17791748046875, 144.8344268798828, 320.14947509765625, 31.237762451171875, -16.341110229492188, 53.53321838378906, 442.16851806640625, 422.22015380859375, 188.41163635253906, -21.016494750976562, 75.64672088623047, -101.11785888671875, 982.8802490234375, 193.2176513671875, 106.08663177490234, 127.1612548828125, 141.2176971435547, 123.1855239868164, 2.569601058959961, 126.14232635498047, 95.33000183105469, 427.5773620605469, -102.96522521972656, 348.53399658203125, 39.06306838989258, -99.22006225585938, 109.94622039794922, 603.9467163085938, 234.15463256835938, 163.53665161132812, 94.5826187133789, 289.59149169921875, -39.93987274169922, -40.94538116455078, 82.03980255126953, 509.56005859375, 519.2034912109375, -108.43630981445312, 257.55194091796875, 133.8019256591797, -187.63357543945312, 287.95379638671875, 92.35499572753906, 328.9390563964844, 697.3301391601562, 110.05500793457031, 442.16259765625, -97.13360595703125, 72.28115844726562, -199.33279418945312, 249.9700927734375, 366.2914123535156, -113.45635986328125, 346.9661865234375, 13.22153091430664, -112.82319641113281, -7.214282989501953, -117.58442687988281, 163.62521362304688, 304.06219482421875, 100.53096771240234, 108.15336608886719, -80.18798828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000332.npy"}
|
||||
{"epoch": 0.48751835535976507, "step": 333, "batch_size": 64, "mean": 215.58282470703125, "std": 300.0898132324219, "min": -337.0198669433594, "p10": -177.33856353759765, "median": 193.65960693359375, "p90": 599.5364868164064, "max": 929.36767578125, "pos_frac": 0.703125, "sample": [-33.68925476074219, 298.2928771972656, 929.36767578125, -8.201156616210938, 79.56375885009766, 36.26103210449219, 277.96478271484375, 240.8251495361328, 70.8004150390625, 36.434085845947266, 790.8388061523438, -165.24095153808594, 177.80706787109375, -233.14559936523438, 567.4407348632812, -228.824951171875, 668.4959716796875, -99.88336181640625, 654.1898193359375, 399.8901062011719, -12.92447280883789, 309.62017822265625, -52.39384460449219, 355.1957092285156, 498.41107177734375, 538.61865234375, -182.52325439453125, 117.8238525390625, 18.033203125, 425.4029541015625, 144.52999877929688, -5.89286994934082, -79.61370849609375, 494.4514465332031, 613.2918090820312, 399.42791748046875, -259.44659423828125, 10.08218002319336, 503.50445556640625, 423.4761047363281, 453.76666259765625, -216.5332794189453, -15.9755859375, 514.2869262695312, 477.90191650390625, -48.714698791503906, 197.3439483642578, 161.45680236816406, 760.192626953125, 278.4499816894531, 504.15887451171875, 267.4483337402344, 566.6177368164062, 195.595703125, 191.7235107421875, -137.5911407470703, 449.21099853515625, -4.725612640380859, 340.75567626953125, -305.43707275390625, 659.4378051757812, -337.0198669433594, 53.28126525878906, 73.40736389160156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000333.npy"}
|
||||
{"epoch": 0.4889867841409692, "step": 334, "batch_size": 64, "mean": 194.11178588867188, "std": 307.4593505859375, "min": -578.9366455078125, "p10": -146.26842117309567, "median": 157.10147857666016, "p90": 661.5992492675783, "max": 892.7734375, "pos_frac": 0.796875, "sample": [620.5595703125, 57.55615234375, -178.71786499023438, 499.435791015625, 239.52947998046875, 41.909088134765625, -95.24535369873047, 58.97667694091797, 251.840576171875, 29.619308471679688, 213.36212158203125, -4.251106262207031, 357.6494445800781, 194.22042846679688, -157.6125946044922, 203.09312438964844, 49.67073059082031, 446.52960205078125, -578.9366455078125, 693.3045654296875, 51.467044830322266, 218.3515167236328, -25.212142944335938, -294.1171875, 362.90582275390625, 270.4514465332031, 503.95928955078125, -414.0010986328125, -220.4944610595703, 8.750808715820312, 34.588958740234375, -85.65866088867188, 131.3273162841797, 74.52581787109375, 806.5382690429688, 892.7734375, 291.737060546875, 32.31317901611328, 58.805233001708984, 511.3416748046875, 56.694602966308594, 287.58856201171875, 527.4496459960938, 381.30780029296875, -90.21577453613281, 61.41476821899414, 799.8345947265625, 62.694793701171875, 741.322998046875, 523.1889038085938, -119.7986831665039, -366.09674072265625, 41.399658203125, 294.3121643066406, 199.10121154785156, 679.1876831054688, 513.607666015625, 97.62019348144531, 44.76866149902344, 358.4842529296875, 67.74215698242188, 705.744140625, 220.07945251464844, 182.87564086914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000334.npy"}
|
||||
{"epoch": 0.49045521292217326, "step": 335, "batch_size": 64, "mean": 148.0127410888672, "std": 286.202880859375, "min": -302.2250061035156, "p10": -221.52425537109374, "median": 109.42106628417969, "p90": 459.7585937500001, "max": 1027.07275390625, "pos_frac": 0.625, "sample": [186.97705078125, 415.9536437988281, 440.8411865234375, -12.784271240234375, 178.52928161621094, -82.7123794555664, -37.256614685058594, 440.880126953125, 62.562381744384766, 54.35478973388672, -247.58834838867188, -19.830520629882812, 302.921142578125, 260.5849609375, -42.61327362060547, 678.1712036132812, -33.017860412597656, 84.78034210205078, -294.4288330078125, 228.20559692382812, -136.99822998046875, 57.58802795410156, 222.6114501953125, 102.89889526367188, 742.2294921875, 607.6022338867188, 406.0062255859375, 283.056396484375, 46.19374084472656, 667.3953857421875, 1027.07275390625, -302.2250061035156, 135.28762817382812, -295.1396789550781, 403.0246276855469, -31.780954360961914, 436.81005859375, -2.4405345916748047, 70.60049438476562, 594.1134643554688, -89.12091064453125, 334.2425842285156, -210.13790893554688, 127.62744140625, 248.80545043945312, -96.82711029052734, 236.29356384277344, -24.42572021484375, -280.5238952636719, -117.09136962890625, 115.9432373046875, -164.3841094970703, 428.75347900390625, -263.54248046875, -210.54864501953125, 382.2738952636719, 269.74566650390625, 467.849365234375, -21.112106323242188, -226.22808837890625, 328.84381103515625, 351.4996032714844, 218.8150634765625, 67.62826538085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000335.npy"}
|
||||
{"epoch": 0.4919236417033774, "step": 336, "batch_size": 64, "mean": 249.61618041992188, "std": 359.6513366699219, "min": -687.2728271484375, "p10": -103.95736541748046, "median": 254.58018493652344, "p90": 763.5552795410157, "max": 1204.8065185546875, "pos_frac": 0.84375, "sample": [319.9119873046875, 69.411865234375, 934.35302734375, -33.60596466064453, 428.92364501953125, 280.35369873046875, 280.7238464355469, -128.79910278320312, -75.00625610351562, 891.8880615234375, 309.20306396484375, 350.0997314453125, 143.60989379882812, 483.4533996582031, 1204.8065185546875, 22.901945114135742, 816.7301025390625, 312.0234375, 272.67999267578125, 424.31060791015625, -100.74568176269531, 57.32939147949219, 107.73210144042969, 250.0164794921875, 125.7578125, 423.4588317871094, 118.10604858398438, 98.32028198242188, 771.9212036132812, 281.74066162109375, 206.16004943847656, 64.96888732910156, 50.48585510253906, 259.1438903808594, 108.20204162597656, 82.09857177734375, -468.3065185546875, 742.599609375, -105.33380126953125, -265.9677734375, 744.0347900390625, 18.51776885986328, 873.4383544921875, 67.63758850097656, 454.58538818359375, 71.93850708007812, -687.2728271484375, 77.7859878540039, 327.575927734375, 326.504150390625, 124.04872131347656, 118.56620025634766, 174.2976531982422, 22.14628028869629, 273.2557067871094, 512.1853637695312, 550.9138793945312, 1116.803955078125, -388.1047668457031, 353.34393310546875, -489.0634765625, 373.0149841308594, 299.2814025878906, 544.3384399414062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000336.npy"}
|
||||
{"epoch": 0.4933920704845815, "step": 337, "batch_size": 64, "mean": 250.1221160888672, "std": 284.1232604980469, "min": -287.51446533203125, "p10": -83.99329490661619, "median": 219.81403350830078, "p90": 636.7614990234376, "max": 1083.242919921875, "pos_frac": 0.828125, "sample": [957.31298828125, 257.1723937988281, -213.63441467285156, 112.67501068115234, 612.1195678710938, 518.5068359375, -17.840431213378906, 290.39593505859375, -13.580066680908203, 470.45977783203125, 4.465339660644531, 253.5657958984375, 294.28033447265625, 412.8682861328125, 463.1806335449219, -130.69039916992188, 217.43572998046875, 160.7623291015625, 29.036148071289062, 536.3838500976562, 221.47352600097656, 218.154541015625, 27.8282527923584, 647.3223266601562, 164.47874450683594, -97.2095947265625, 1083.242919921875, 88.4368896484375, -201.73114013671875, -195.63821411132812, 235.85693359375, 190.85206604003906, 266.83746337890625, -216.16134643554688, 207.2075653076172, 90.24620056152344, 482.3889465332031, -15.989334106445312, 297.6954345703125, 390.1864013671875, 42.59553146362305, 717.2039184570312, 83.7589111328125, 193.13015747070312, -287.51446533203125, 134.43466186523438, 858.3563842773438, 524.131591796875, 361.5011901855469, 66.30982208251953, 185.15182495117188, 93.30965423583984, 708.8681640625, 370.638427734375, 168.2113494873047, 717.0797729492188, 407.94964599609375, 226.89276123046875, 33.185447692871094, 360.1070556640625, 296.0467834472656, 338.710205078125, -53.1552619934082, 360.5586242675781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000337.npy"}
|
||||
{"epoch": 0.4948604992657856, "step": 338, "batch_size": 64, "mean": 233.94757080078125, "std": 240.927001953125, "min": -252.11795043945312, "p10": -18.493199157714834, "median": 188.75465393066406, "p90": 562.4945983886719, "max": 847.487548828125, "pos_frac": 0.84375, "sample": [60.38990783691406, 564.6362915039062, 87.36482238769531, -252.11795043945312, 220.38424682617188, 174.6722869873047, -151.14138793945312, 226.7339324951172, 655.4385375976562, 257.2611389160156, 139.70433044433594, 85.66810607910156, -126.637939453125, 488.5351867675781, 95.60104370117188, 343.1292724609375, 26.821624755859375, 508.9426574707031, 427.2889099121094, 510.4498596191406, 713.980224609375, -22.69452667236328, 223.3609619140625, 233.66468811035156, 257.488037109375, -3.0830230712890625, 441.90777587890625, -23.22325897216797, -8.690101623535156, -2.9499130249023438, 234.4188232421875, 568.9586181640625, 510.9698486328125, 86.04386901855469, 224.76025390625, 415.3898620605469, 159.6983642578125, 44.86643600463867, 339.1860656738281, 150.49159240722656, 378.15435791015625, 61.0272216796875, 89.61105346679688, 101.59066772460938, 514.537353515625, 41.476104736328125, 178.51602172851562, 122.9559555053711, 375.282470703125, 171.16517639160156, 20.07720947265625, 38.724308013916016, 547.7216796875, 847.487548828125, -76.88426971435547, 283.38214111328125, 557.497314453125, 198.9932861328125, 360.75799560546875, 172.27493286132812, 667.285400390625, -246.01040649414062, 66.3265609741211, 613.0252075195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000338.npy"}
|
||||
{"epoch": 0.49632892804698975, "step": 339, "batch_size": 64, "mean": 231.79090881347656, "std": 257.9338073730469, "min": -293.88653564453125, "p10": -44.42213745117187, "median": 221.4570770263672, "p90": 624.6512756347659, "max": 892.3178100585938, "pos_frac": 0.8125, "sample": [11.370941162109375, -152.832275390625, -83.3443603515625, -44.90814971923828, 654.4990234375, 68.71022033691406, -3.5443267822265625, 138.50125122070312, 355.3064880371094, 44.87757110595703, 892.3178100585938, 39.74928283691406, 98.1234130859375, 183.028564453125, 326.70794677734375, 377.30169677734375, 22.513193130493164, 8.708335876464844, 77.9069595336914, 579.29931640625, 280.30560302734375, -25.989768981933594, 284.3866271972656, -18.951637268066406, 292.3904113769531, 121.46934509277344, 705.0706787109375, 27.509429931640625, 396.9317626953125, 644.0878295898438, -177.95318603515625, 264.67510986328125, -293.88653564453125, 158.25796508789062, 493.26031494140625, 363.05682373046875, 178.0311279296875, -43.288108825683594, 255.81832885742188, 507.6590881347656, 155.72364807128906, 262.0166320800781, 479.82525634765625, 539.97314453125, 111.45093536376953, 652.5258178710938, -153.99090576171875, 661.995849609375, 326.0237731933594, -211.40098571777344, 227.97250366210938, 209.94964599609375, 38.65861511230469, 214.941650390625, 45.66079330444336, 248.2643280029297, 240.32237243652344, -39.265464782714844, 515.7213134765625, 365.742919921875, 537.0433349609375, 718.5989990234375, 399.08795166015625, 280.6419372558594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000339.npy"}
|
||||
{"epoch": 0.4977973568281938, "step": 340, "batch_size": 64, "mean": 198.86572265625, "std": 288.56158447265625, "min": -484.27764892578125, "p10": -103.57010421752928, "median": 182.1568374633789, "p90": 602.4258056640626, "max": 1030.6243896484375, "pos_frac": 0.78125, "sample": [179.428955078125, 254.37213134765625, 92.6952896118164, 228.1295166015625, 232.04075622558594, 534.1060180664062, 646.384765625, -484.27764892578125, 258.35980224609375, -44.119590759277344, 354.306884765625, 1030.6243896484375, 95.0223388671875, -288.5621337890625, 616.232666015625, -82.05916595458984, 287.5509338378906, 121.3757095336914, -37.60724639892578, 91.05810546875, 198.0865936279297, -224.87835693359375, -395.0338439941406, 576.7935180664062, -184.11192321777344, 93.90300750732422, 37.208580017089844, 448.4706115722656, -42.58612060546875, 314.07696533203125, 545.9421997070312, 4.0361480712890625, 869.0151977539062, 613.4110717773438, 13.82771110534668, 147.35592651367188, 125.92913818359375, 261.93865966796875, 184.8847198486328, 206.92831420898438, 227.03207397460938, 131.47390747070312, 213.6929931640625, -8.07400894165039, -112.78907775878906, 269.6142272949219, 289.65673828125, -77.20662689208984, 523.9381713867188, 1.1229095458984375, 130.93429565429688, 439.5491027832031, 654.8216552734375, 683.4801025390625, 374.53582763671875, 254.83499145507812, 157.19876098632812, 36.195804595947266, 384.6108703613281, 157.74969482421875, -302.6820068359375, 396.25677490234375, -53.44004821777344, 74.63819122314453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000340.npy"}
|
||||
{"epoch": 0.49926578560939794, "step": 341, "batch_size": 64, "mean": 226.50100708007812, "std": 268.1539001464844, "min": -285.8695373535156, "p10": -83.95732269287109, "median": 210.98403930664062, "p90": 550.7614196777345, "max": 1119.803466796875, "pos_frac": 0.796875, "sample": [1119.803466796875, -84.4918212890625, -93.99238586425781, -149.68466186523438, 333.27239990234375, 525.9932861328125, 730.152099609375, 483.4325866699219, 248.35755920410156, 171.60409545898438, -35.225250244140625, 166.45486450195312, 71.95391845703125, 267.6871032714844, 17.13868522644043, -47.30961608886719, -82.71015930175781, 376.7877197265625, 41.43437957763672, 291.54718017578125, -162.96743774414062, 388.9029541015625, 432.63623046875, 233.79983520507812, 75.89989471435547, 595.9016723632812, 128.99066162109375, 227.80252075195312, -72.03752136230469, -56.09086608886719, 100.62562561035156, 455.759033203125, 613.9605712890625, 300.38531494140625, 283.3509521484375, 437.3780517578125, 101.91468811035156, 7.733240127563477, -106.90250396728516, 445.09869384765625, 533.1348266601562, 315.8748779296875, 51.5931396484375, 73.97940826416016, -260.82403564453125, 569.4329223632812, 129.0904083251953, 229.19761657714844, -285.8695373535156, 351.0676574707031, 194.16555786132812, 284.1647644042969, 460.9935302734375, -76.9893569946289, 808.2080688476562, 35.49272155761719, 347.96502685546875, 479.42083740234375, 440.5791015625, 54.58828353881836, 150.41629028320312, 107.28899383544922, 558.315673828125, 160.43014526367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000341.npy"}
|
||||
{"epoch": 0.5007342143906021, "step": 342, "batch_size": 64, "mean": 201.77847290039062, "std": 280.6956481933594, "min": -582.7496337890625, "p10": -100.31032409667968, "median": 171.16423797607422, "p90": 498.5778961181642, "max": 1105.904541015625, "pos_frac": 0.78125, "sample": [742.4360961914062, -582.7496337890625, -238.49893188476562, 516.822998046875, 54.690128326416016, 44.34752655029297, 152.82815551757812, 369.64044189453125, 313.58612060546875, 349.5036926269531, 149.2448272705078, 94.56135559082031, 193.9039764404297, 192.98204040527344, 513.2181396484375, 313.4605712890625, 70.22236633300781, 463.7770080566406, -185.68130493164062, -36.56913757324219, 445.98388671875, -84.8009262084961, 61.663909912109375, -94.8093490600586, -76.1138687133789, 355.84771728515625, 919.211181640625, 383.6207275390625, 142.90403747558594, -209.9601287841797, -71.17237854003906, 1105.904541015625, 217.15609741210938, 185.04652404785156, 13.975540161132812, -30.117393493652344, 577.4082641601562, 444.7290344238281, 626.864501953125, 174.71974182128906, -24.220136642456055, 173.7373504638672, 343.5736999511719, 25.298980712890625, 464.4173278808594, 401.9107360839844, 156.6707763671875, 350.2077331542969, 111.67137145996094, 439.21185302734375, 446.97186279296875, 88.28131866455078, -213.07492065429688, 79.21250915527344, 418.2903137207031, -131.51699829101562, 168.59112548828125, -102.66788482666016, 192.62924194335938, 104.57392883300781, 208.57937622070312, 403.5238342285156, 138.64764404296875, 89.51290130615234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000342.npy"}
|
||||
{"epoch": 0.5022026431718062, "step": 343, "batch_size": 64, "mean": 240.35006713867188, "std": 223.0820770263672, "min": -354.916259765625, "p10": 7.4985576629638695, "median": 212.0894317626953, "p90": 517.5120178222656, "max": 825.4210815429688, "pos_frac": 0.921875, "sample": [361.18341064453125, 211.33462524414062, 125.96861267089844, 153.121337890625, 250.82003784179688, 9.8076171875, 364.22930908203125, 349.1842041015625, 205.32525634765625, 747.5287475585938, 59.60292053222656, -180.7626953125, 226.18829345703125, 67.21395874023438, 194.66290283203125, 73.53948974609375, 77.92916870117188, 227.28025817871094, 251.14065551757812, 696.6226196289062, 189.53211975097656, 301.86761474609375, 76.98890686035156, 320.8920593261719, 393.302978515625, 299.21234130859375, 115.34011840820312, 99.05783081054688, 825.4210815429688, 173.6863250732422, 4.478157043457031, 91.02081298828125, 329.7221984863281, 118.62017822265625, 271.3111877441406, 342.5719909667969, 251.58346557617188, 95.66041564941406, 6.508960723876953, 464.5174865722656, -354.916259765625, 328.5371398925781, 138.89712524414062, 433.8299865722656, 341.0452880859375, 754.8187866210938, 445.70062255859375, 324.0660095214844, 149.42324829101562, 327.9637756347656, 183.96292114257812, 388.8661804199219, 52.86846923828125, -98.2017822265625, 509.82427978515625, 520.8067626953125, 183.6547088623047, 165.42294311523438, 173.75527954101562, 212.84423828125, -51.043128967285156, 610.7227783203125, 587.013671875, -190.67459106445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000343.npy"}
|
||||
{"epoch": 0.5036710719530103, "step": 344, "batch_size": 64, "mean": 206.81173706054688, "std": 281.90093994140625, "min": -270.578125, "p10": -49.918048095703114, "median": 176.6980438232422, "p90": 561.5502197265625, "max": 1470.255126953125, "pos_frac": 0.78125, "sample": [49.49113464355469, -73.6865463256836, -5.9020233154296875, -12.425834655761719, 53.176395416259766, 73.29074096679688, 199.2345733642578, 88.88129425048828, 79.35868835449219, 293.462646484375, -11.649351119995117, 836.0181274414062, 82.85503387451172, 261.07269287109375, 138.35899353027344, 194.10360717773438, 177.54554748535156, -270.578125, -233.07839965820312, 282.3045349121094, 212.21534729003906, 53.60133361816406, 175.8505401611328, 0.3858833312988281, 232.39727783203125, 606.536865234375, 584.4889526367188, 395.2531433105469, 93.05648040771484, -38.87420654296875, 353.5586242675781, 241.1011962890625, 271.68048095703125, 91.16268920898438, 179.66278076171875, -18.818450927734375, 366.2840576171875, -54.651123046875, 304.70025634765625, -6.98967170715332, 534.1259765625, 280.7088317871094, 201.82664489746094, 12.330070495605469, 92.5707015991211, 1470.255126953125, 443.45947265625, 291.8017883300781, 12.428361892700195, 638.6590576171875, -21.858375549316406, 572.4127197265625, 163.65419006347656, 565.7294921875, 73.71177673339844, -212.76380920410156, -147.81402587890625, 452.9810791015625, 551.798583984375, -206.48995971679688, 279.1020812988281, 390.1280822753906, 38.72889709472656, 514.0283203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000344.npy"}
|
||||
{"epoch": 0.5051395007342144, "step": 345, "batch_size": 64, "mean": 233.68710327148438, "std": 273.42999267578125, "min": -423.1139221191406, "p10": -55.48074645996093, "median": 199.5896224975586, "p90": 616.8588562011722, "max": 939.6617431640625, "pos_frac": 0.8125, "sample": [-15.325874328613281, 650.5565185546875, 91.54428100585938, 382.26348876953125, 143.5431365966797, -207.28964233398438, 724.77685546875, -169.31687927246094, 164.01934814453125, 445.9998779296875, -22.732961654663086, 320.3037109375, 118.54058837890625, 411.6398010253906, 543.0505981445312, 129.86135864257812, -307.0761413574219, 333.32501220703125, 18.91238021850586, 74.69697570800781, 426.6326599121094, 406.50787353515625, 73.37528991699219, 368.3485107421875, 250.31463623046875, 537.666015625, 116.44878387451172, 305.91204833984375, 93.80972290039062, 194.95033264160156, 29.580108642578125, 822.322998046875, 411.32012939453125, 74.05308532714844, 97.88265991210938, 438.5781555175781, 433.23516845703125, 447.80902099609375, 363.7345275878906, 236.9794464111328, 417.56964111328125, 939.6617431640625, 44.75735092163086, 141.44189453125, 407.4793701171875, 295.1803894042969, -2.742443084716797, 752.683349609375, 204.22891235351562, 474.229736328125, -49.23888397216797, -136.15023803710938, 206.41574096679688, 648.490966796875, -10.77479362487793, -200.33126831054688, -58.15583038330078, 148.86758422851562, 659.790771484375, 216.18704223632812, 176.8895263671875, 127.85519409179688, -423.1139221191406, 13.998825073242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000345.npy"}
|
||||
{"epoch": 0.5066079295154186, "step": 346, "batch_size": 64, "mean": 220.168212890625, "std": 256.4474792480469, "min": -561.2949829101562, "p10": -89.96243438720703, "median": 242.04862213134766, "p90": 584.5332519531253, "max": 754.9864501953125, "pos_frac": 0.8125, "sample": [284.787353515625, 137.81556701660156, 36.99303436279297, 337.70599365234375, 631.2586669921875, 527.5384521484375, 689.10107421875, 228.83309936523438, 137.03477478027344, 740.8250122070312, -561.2949829101562, 111.28355407714844, 403.97442626953125, 85.08761596679688, 326.9662780761719, 639.819091796875, -12.812225341796875, 318.1630859375, 754.9864501953125, 59.98675537109375, 332.48541259765625, 181.16897583007812, -144.08450317382812, 400.81243896484375, 308.9714660644531, 354.671630859375, 331.1775817871094, 262.65411376953125, 398.0744323730469, 45.656402587890625, 13.158323287963867, -87.61641693115234, 213.50660705566406, 61.10923767089844, 608.9595947265625, 179.14305114746094, 255.26414489746094, 124.78152465820312, 301.6272888183594, -133.19749450683594, -44.31372833251953, 613.920166015625, 322.7279357910156, 516.484130859375, 173.3845977783203, 356.84625244140625, 384.839111328125, 348.5670166015625, 19.059429168701172, -88.32989501953125, 273.3573303222656, -90.66209411621094, 80.56893920898438, -195.08868408203125, 95.63975524902344, 514.8493041992188, 135.31460571289062, 151.3653564453125, 438.7681579589844, -237.90756225585938, -16.222381591796875, -171.18612670898438, 308.7441101074219, 313.66290283203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000346.npy"}
|
||||
{"epoch": 0.5080763582966226, "step": 347, "batch_size": 64, "mean": 220.22601318359375, "std": 285.9312438964844, "min": -334.8613586425781, "p10": -112.49094085693359, "median": 185.0916290283203, "p90": 636.6517822265627, "max": 909.8409423828125, "pos_frac": 0.765625, "sample": [-334.8613586425781, 283.5430908203125, 468.85882568359375, 344.5904235839844, 88.33440399169922, -34.22107696533203, 225.81271362304688, 284.06964111328125, 676.0675048828125, 76.97411346435547, 425.43963623046875, 34.76025390625, 168.9235382080078, -80.75286865234375, 423.5916748046875, -313.80224609375, -291.3729553222656, -114.80776977539062, 770.3792724609375, 99.27410125732422, 527.850830078125, 37.06631851196289, 230.4564208984375, 340.61895751953125, -208.4757080078125, 526.052001953125, 598.613037109375, 297.84967041015625, 16.558692932128906, 652.9541015625, 314.62615966796875, 671.7171020507812, 346.0341796875, -65.74930572509766, 206.99966430664062, -97.64745330810547, 503.6646728515625, 147.14572143554688, 146.45465087890625, -107.08500671386719, 165.96559143066406, 285.2790222167969, 87.68785858154297, 337.56951904296875, 159.04446411132812, 102.85049438476562, 117.58404541015625, -4.825202941894531, 909.8409423828125, 84.74005889892578, 378.19769287109375, 654.2498168945312, 201.2597198486328, 815.1336669921875, -97.0971450805664, 92.18122100830078, 343.3182067871094, -229.2155303955078, 416.6074523925781, -3.8075408935546875, 465.2385559082031, -164.48472595214844, 130.89317321777344, 559.748291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000347.npy"}
|
||||
{"epoch": 0.5095447870778267, "step": 348, "batch_size": 64, "mean": 181.44418334960938, "std": 260.2834777832031, "min": -331.294189453125, "p10": -133.92335433959957, "median": 199.37931060791016, "p90": 433.4000854492188, "max": 1103.7850341796875, "pos_frac": 0.765625, "sample": [332.5848693847656, 78.01512908935547, 178.91259765625, 426.3167724609375, 361.0412292480469, -78.0357437133789, 87.8730239868164, 339.0834045410156, 403.21441650390625, -331.294189453125, 114.56111145019531, 65.55914306640625, 605.9849853515625, 308.4425354003906, 135.65821838378906, 288.7143859863281, 13.478199005126953, 297.3711853027344, 135.64886474609375, 81.2010726928711, 315.1932678222656, 306.02313232421875, -106.14986419677734, 1103.7850341796875, 414.2528076171875, 271.5618591308594, -92.80986785888672, 227.54910278320312, -32.063323974609375, 73.47338104248047, -98.12684631347656, -145.82627868652344, -177.38815307617188, 848.2289428710938, 573.71923828125, 329.15118408203125, 308.2784423828125, 230.6233367919922, 230.3726806640625, 3.6806468963623047, -280.01904296875, 390.1525573730469, -32.97724151611328, 319.7829284667969, 269.6520080566406, 233.86346435546875, -189.000244140625, 12.193826675415039, 545.8564453125, 281.2030029296875, 95.63309478759766, -91.12400817871094, 436.435791015625, -245.77203369140625, 335.5711364746094, 219.8460235595703, 81.62853240966797, 27.980411529541016, 28.534393310546875, 544.5438232421875, 95.9805908203125, 298.3619689941406, -146.84779357910156, -46.911319732666016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000348.npy"}
|
||||
{"epoch": 0.5110132158590308, "step": 349, "batch_size": 64, "mean": 213.40219116210938, "std": 302.7085266113281, "min": -494.3605041503906, "p10": -93.46201019287109, "median": 146.0917510986328, "p90": 586.9040405273438, "max": 1200.22998046875, "pos_frac": 0.765625, "sample": [134.12612915039062, -366.5832214355469, 480.9944152832031, 128.5244903564453, 100.28599548339844, 112.30532836914062, 471.19329833984375, -91.83202362060547, 243.98812866210938, 409.3927917480469, 152.50555419921875, 527.4595947265625, 154.1434783935547, -40.868736267089844, 47.67974090576172, 65.81851959228516, -26.341650009155273, 30.43682098388672, 233.7414093017578, -217.66397094726562, 493.36151123046875, 345.1891174316406, 376.1065673828125, 18.803937911987305, 333.6448669433594, -69.22962951660156, 429.19244384765625, -94.16057586669922, 99.21733856201172, 166.66940307617188, 816.2769165039062, 15.709892272949219, -20.309185028076172, 519.7001953125, 183.6260986328125, -494.3605041503906, -120.59984588623047, 107.17606353759766, 583.7913818359375, 44.79776382446289, 735.2403564453125, -4.839263916015625, -250.61770629882812, -42.21443176269531, 588.238037109375, 614.96435546875, 850.2982177734375, 192.34605407714844, 600.9570922851562, 139.67794799804688, 437.9051513671875, 490.715576171875, 4.058540344238281, 317.474365234375, 290.50958251953125, 282.8770446777344, -143.4583740234375, 63.50630187988281, -18.824512481689453, 469.5083923339844, 400.542724609375, 80.44549560546875, 74.29010009765625, 1200.22998046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000349.npy"}
|
||||
{"epoch": 0.5124816446402349, "step": 350, "batch_size": 64, "mean": 206.09213256835938, "std": 287.3081970214844, "min": -540.822265625, "p10": -188.86669769287107, "median": 192.16341400146484, "p90": 505.84445800781265, "max": 909.2736206054688, "pos_frac": 0.828125, "sample": [814.4567260742188, -360.9170227050781, 123.34186553955078, 385.4510803222656, 330.8413391113281, 184.03131103515625, 403.6324157714844, 89.1085205078125, -142.6424102783203, 114.59727478027344, 94.63778686523438, 448.400634765625, -389.7562255859375, 167.11500549316406, 196.56655883789062, 12.955413818359375, 724.730224609375, 433.38543701171875, 909.2736206054688, 290.6800842285156, 179.76840209960938, 295.5799865722656, -31.6729793548584, 461.8616027832031, 215.934326171875, 799.9005737304688, 104.45120239257812, -245.03228759765625, 127.39270782470703, 57.18219757080078, 523.896240234375, 116.778564453125, 153.06015014648438, 180.09414672851562, 359.3236999511719, 305.24200439453125, 118.06771087646484, 221.31394958496094, -200.71347045898438, 674.09033203125, 187.76026916503906, 463.7236328125, 323.788330078125, -206.89376831054688, 126.84729766845703, 242.5400848388672, 600.8248901367188, 423.2947082519531, 212.84518432617188, 299.4982604980469, -540.822265625, 430.43511962890625, 143.14662170410156, 242.62872314453125, 59.500213623046875, 206.56973266601562, -383.35498046875, -45.805572509765625, 23.743309020996094, 187.65762329101562, 432.535888671875, 210.879638671875, -161.22422790527344, 463.369140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000350.npy"}
|
||||
{"epoch": 0.5139500734214391, "step": 351, "batch_size": 64, "mean": 253.93798828125, "std": 324.66754150390625, "min": -422.4456787109375, "p10": -114.25879592895501, "median": 232.38806915283203, "p90": 719.6615234375, "max": 1229.544677734375, "pos_frac": 0.78125, "sample": [103.017333984375, 293.2396240234375, 504.19378662109375, 1089.55029296875, 1229.544677734375, 141.89674377441406, 398.6943359375, 112.15336608886719, 278.597412109375, 310.80487060546875, 66.4312744140625, 242.218505859375, 734.8220825195312, 842.0382690429688, 385.22271728515625, 342.8908996582031, 509.93768310546875, 582.5311889648438, 707.587890625, 1.1016483306884766, -205.28366088867188, 375.868896484375, 126.75228881835938, 246.7840576171875, 156.80264282226562, 336.62017822265625, 561.4234008789062, 238.6744842529297, -422.4456787109375, -49.75190734863281, -324.7501525878906, 132.60772705078125, -140.6925811767578, 121.11846160888672, -39.14862060546875, 887.9114990234375, 27.886810302734375, 121.93685913085938, 232.330078125, 15.871482849121094, -145.77395629882812, 349.2059326171875, 15.906269073486328, 724.8359375, 110.11129760742188, -11.558191299438477, 215.2702178955078, -29.690338134765625, -30.37750244140625, -154.28709411621094, -52.57996368408203, 660.142333984375, 195.48390197753906, 382.69378662109375, 747.385009765625, -169.1682586669922, 282.1267395019531, 634.38232421875, 277.19830322265625, 293.8182067871094, 138.64686584472656, 355.87762451171875, 232.44606018066406, -47.05449676513672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000351.npy"}
|
||||
{"epoch": 0.5154185022026432, "step": 352, "batch_size": 64, "mean": 158.47000122070312, "std": 285.0350341796875, "min": -622.8960571289062, "p10": -160.8986633300781, "median": 129.08557891845703, "p90": 505.80632019042974, "max": 1030.3193359375, "pos_frac": 0.71875, "sample": [409.06793212890625, 363.63470458984375, 154.99215698242188, 509.7094421386719, -315.4535827636719, 50.38005065917969, 66.07331085205078, 421.260009765625, 43.75132751464844, 239.3072052001953, 736.712158203125, 464.8265380859375, -46.10923385620117, 114.75811004638672, 242.96617126464844, -44.47099304199219, -112.8817138671875, 1030.3193359375, 275.7903137207031, -146.39874267578125, 176.5078125, 719.9412841796875, 137.2115936279297, 429.23773193359375, 43.59101867675781, -88.73596954345703, -127.68441772460938, -265.7576904296875, 3.9751663208007812, 99.02316284179688, 136.00167846679688, 496.69903564453125, 16.865888595581055, 17.24530792236328, 571.1426391601562, 130.1224365234375, -622.8960571289062, 58.61651611328125, 195.7205810546875, 353.14129638671875, 260.11090087890625, 121.23593139648438, 180.6060028076172, -176.03302001953125, -85.76408386230469, -19.080596923828125, -252.32333374023438, 571.885986328125, 484.98553466796875, -172.0721893310547, 262.21490478515625, -84.0338363647461, -167.1129150390625, 428.76800537109375, 252.02658081054688, 192.31903076171875, 441.956787109375, -87.38481903076172, 606.7493896484375, 128.04872131347656, 5.552947998046875, -31.359054565429688, 228.65740966796875, 113.9217529296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000352.npy"}
|
||||
{"epoch": 0.5168869309838473, "step": 353, "batch_size": 64, "mean": 203.07354736328125, "std": 289.1540832519531, "min": -402.90374755859375, "p10": -85.00876846313476, "median": 160.27713012695312, "p90": 636.3913330078126, "max": 871.6627197265625, "pos_frac": 0.703125, "sample": [395.15557861328125, 405.2440490722656, 74.7791519165039, 468.53448486328125, 188.04428100585938, 415.9888000488281, 38.95130920410156, -124.86300659179688, 611.3255004882812, 183.673095703125, 287.7742004394531, -80.50702667236328, 249.64369201660156, 552.1220703125, 29.726806640625, 337.58392333984375, -143.0163116455078, -27.86517333984375, 611.1573486328125, 662.9164428710938, 157.66725158691406, 273.7257385253906, -63.49989700317383, 89.03460693359375, -402.90374755859375, 430.16546630859375, -205.62445068359375, -86.04413604736328, -80.16693878173828, 293.00701904296875, -43.99772644042969, 155.956298828125, 344.1090393066406, 317.01422119140625, 11.8267822265625, -22.47249984741211, 11.977508544921875, -157.23052978515625, 831.5740966796875, 1.5961170196533203, -24.23883056640625, 655.5252075195312, -65.8809585571289, 45.65174102783203, 112.19014739990234, 324.29693603515625, 331.76678466796875, 162.8870086669922, -46.784393310546875, 643.9130859375, 280.3671569824219, -82.59291076660156, 618.840576171875, 368.1818542480469, 871.6627197265625, 812.4207763671875, -232.35121154785156, 367.4639892578125, 669.187255859375, -80.96633911132812, 59.57817840576172, 27.640850067138672, -59.475433349609375, 245.34005737304688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000353.npy"}
|
||||
{"epoch": 0.5183553597650514, "step": 354, "batch_size": 64, "mean": 228.19166564941406, "std": 238.4373016357422, "min": -197.09796142578125, "p10": -64.34891357421873, "median": 198.8369369506836, "p90": 523.5001373291017, "max": 958.309326171875, "pos_frac": 0.796875, "sample": [347.88775634765625, 535.8257446289062, 203.03819274902344, 40.0179443359375, 459.1271057128906, 194.63568115234375, 659.7177734375, -82.38710021972656, 276.3802795410156, -91.52934265136719, -75.57917022705078, -197.09796142578125, 243.98155212402344, 958.309326171875, 471.2592468261719, 142.86679077148438, -27.693870544433594, 344.4136962890625, 409.1319580078125, 312.8858947753906, 22.79749298095703, 384.2773742675781, 169.35189819335938, 155.26052856445312, -144.74783325195312, 106.70318603515625, 376.0868225097656, 194.19381713867188, 90.18517303466797, 401.1225891113281, 551.9136352539062, -79.63084411621094, 301.36907958984375, 267.4885559082031, 205.68661499023438, 203.86024475097656, -132.322021484375, 476.1046447753906, 156.1763916015625, 114.34822082519531, 494.7403869628906, -38.144981384277344, 342.8132019042969, 186.15264892578125, -16.91857147216797, 305.0018310546875, 78.1694564819336, 685.9358520507812, 383.4161376953125, -15.823272705078125, 10.882055282592773, 418.0732421875, 352.4788818359375, 152.2964324951172, -6.277595520019531, 788.1542358398438, 27.815208435058594, 384.50189208984375, 385.1470642089844, -18.224441528320312, 543.5726318359375, 123.742431640625, 86.63668823242188, 4.707118988037109], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000354.npy"}
|
||||
{"epoch": 0.5198237885462555, "step": 355, "batch_size": 64, "mean": 224.4081268310547, "std": 240.5799102783203, "min": -375.7923583984375, "p10": -50.24597434997557, "median": 223.81344604492188, "p90": 522.1337951660156, "max": 960.360107421875, "pos_frac": 0.796875, "sample": [118.22297668457031, 512.7081298828125, 477.7782897949219, 513.1898803710938, 31.380882263183594, 270.39959716796875, 237.53704833984375, 960.360107421875, -375.7923583984375, 280.57928466796875, 217.21530151367188, 563.2998046875, -3.9139480590820312, -17.79011344909668, 521.0565795898438, 594.3554077148438, -69.83946228027344, 582.4213256835938, 177.94703674316406, 93.02741241455078, 397.13348388671875, 205.21218872070312, -57.27558898925781, -33.84354019165039, 308.2038269042969, 491.01861572265625, 496.68792724609375, 306.0874938964844, 150.5708770751953, -3.9816741943359375, -23.635494232177734, 249.11807250976562, 87.63810729980469, -329.4914855957031, 241.90478515625, -173.6977081298828, 351.9434814453125, 527.6295166015625, 343.3914794921875, 613.1338500976562, 62.001869201660156, 233.5142822265625, 110.94145202636719, 167.90997314453125, 157.1820068359375, 280.9444580078125, 64.55475616455078, 344.6852722167969, 90.81077575683594, 217.4481201171875, 522.595458984375, 443.4739990234375, 353.0263671875, 226.03961181640625, 111.51475524902344, -29.71417236328125, 151.0362091064453, -145.19403076171875, 221.5872802734375, -77.51530456542969, 84.0655517578125, 328.898193359375, 352.7450866699219, 257.6763916015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000355.npy"}
|
||||
{"epoch": 0.5212922173274597, "step": 356, "batch_size": 64, "mean": 125.88236999511719, "std": 306.4451599121094, "min": -634.1937255859375, "p10": -226.50109405517577, "median": 92.38515090942383, "p90": 511.1202667236329, "max": 1004.815673828125, "pos_frac": 0.65625, "sample": [197.76852416992188, -8.567298889160156, 323.4600524902344, 91.0346908569336, -13.171527862548828, -90.31205749511719, -98.02752685546875, 175.01773071289062, -158.29966735839844, 249.6455078125, 340.88916015625, 0.9572944641113281, 740.8629760742188, 492.1963195800781, -115.43556213378906, 613.1182250976562, -152.62559509277344, -214.1513214111328, -634.1937255859375, -231.79385375976562, 74.6906967163086, 787.009033203125, -104.96452331542969, 122.1043701171875, 345.707275390625, 519.2305297851562, -46.664695739746094, 381.5221252441406, -283.1169738769531, 55.307395935058594, -300.09356689453125, 694.8455200195312, 93.73561096191406, -279.1387023925781, 397.13580322265625, 110.89222717285156, 272.59808349609375, 381.0640869140625, -181.97213745117188, 49.25044250488281, -71.26887512207031, -105.3867416381836, 194.316650390625, 238.6597900390625, 8.282707214355469, 250.7373046875, 334.20208740234375, -206.7229461669922, 717.4090576171875, 250.80899047851562, 170.544189453125, 26.940460205078125, -334.7940673828125, -20.66516876220703, 195.76788330078125, 125.11881256103516, 76.60386657714844, 278.1892395019531, -394.367919921875, 1004.815673828125, 323.3502197265625, 314.21185302734375, 11.504814147949219, 70.69800567626953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000356.npy"}
|
||||
{"epoch": 0.5227606461086637, "step": 357, "batch_size": 64, "mean": 200.40087890625, "std": 252.05979919433594, "min": -565.578369140625, "p10": -61.99346542358398, "median": 179.6533203125, "p90": 581.4231079101563, "max": 783.540771484375, "pos_frac": 0.78125, "sample": [396.306396484375, 573.94775390625, 67.89662170410156, 622.4180908203125, 551.8095703125, 13.276412963867188, 33.61604309082031, 397.8192138671875, 115.41567993164062, 584.6268310546875, 256.5135498046875, 94.16020965576172, -121.43720245361328, 600.5704345703125, -57.4580078125, 164.12448120117188, 195.18215942382812, 207.28884887695312, 302.6717529296875, -86.68501281738281, 668.69287109375, -21.850440979003906, 333.9043884277344, -54.84925842285156, -565.578369140625, 61.73906326293945, 294.00628662109375, 131.81060791015625, -63.937232971191406, 595.2318725585938, 278.22247314453125, -158.13134765625, 498.0428161621094, -164.19091796875, 357.92095947265625, 16.192150115966797, 355.07763671875, 783.540771484375, 299.1900634765625, 255.35427856445312, 17.272186279296875, -27.19376564025879, 292.01513671875, 568.6630859375, 56.32752227783203, 66.22335815429688, 34.06260681152344, 239.05374145507812, -0.16943740844726562, 203.01025390625, -53.61572265625, 263.55279541015625, 161.88916015625, 9.271896362304688, 271.1770324707031, 334.1602783203125, 95.6203842163086, 338.8797912597656, -92.77700805664062, 285.27178955078125, 107.27772521972656, 691.37646484375, -8.10382080078125, 159.95889282226562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000357.npy"}
|
||||
{"epoch": 0.5242290748898678, "step": 358, "batch_size": 64, "mean": 188.8855743408203, "std": 260.2104187011719, "min": -409.231201171875, "p10": -64.18499221801757, "median": 120.90529251098633, "p90": 531.0541687011721, "max": 932.255859375, "pos_frac": 0.765625, "sample": [74.88504028320312, 665.661376953125, 25.542068481445312, 278.5182189941406, 249.6669158935547, 305.8791809082031, 70.47697448730469, 293.8421630859375, 124.47802734375, 47.6842041015625, 20.2816162109375, -66.48358154296875, -88.83578491210938, -26.636001586914062, 3.8823070526123047, 789.6392822265625, -58.821617126464844, 555.0004272460938, 65.02334594726562, 100.91693115234375, -6.398397445678711, -35.93778991699219, 18.18115234375, 34.76300811767578, 147.2484130859375, -409.231201171875, 322.79620361328125, -149.4500732421875, 932.255859375, 406.02203369140625, 440.0394287109375, 167.3052978515625, 299.44659423828125, -12.026100158691406, 584.41455078125, 202.65426635742188, -15.629142761230469, 97.60302734375, -94.20360565185547, 330.4375915527344, 317.128662109375, 407.0651550292969, -217.97103881835938, 325.0396423339844, 399.4010925292969, -55.304412841796875, 406.9628601074219, 747.6143798828125, 17.120899200439453, 123.04515075683594, 378.81683349609375, 173.52120971679688, 78.9383544921875, -34.242042541503906, 475.1795654296875, 880.0791625976562, 76.52355194091797, 105.3866195678711, 210.4205780029297, -75.52297973632812, 199.8083038330078, 118.76543426513672, 222.36436462402344, 117.64303588867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000358.npy"}
|
||||
{"epoch": 0.5256975036710719, "step": 359, "batch_size": 64, "mean": 224.70556640625, "std": 293.3180847167969, "min": -411.05682373046875, "p10": -120.74913940429687, "median": 167.94786834716797, "p90": 657.4714538574221, "max": 1010.2642822265625, "pos_frac": 0.75, "sample": [-411.05682373046875, 225.88125610351562, 188.24920654296875, 423.0404052734375, -148.62680053710938, 697.437744140625, -208.92755126953125, 837.2565307617188, 120.79674530029297, -50.69136047363281, -122.82362365722656, -32.396583557128906, 392.3507385253906, 306.86383056640625, -214.8529052734375, 128.5017852783203, 390.27532958984375, -53.477691650390625, 463.3099365234375, -202.82376098632812, 244.650146484375, 225.88235473632812, 489.83465576171875, 52.345664978027344, 55.48738479614258, 169.0489044189453, -42.09880828857422, 566.3650512695312, 114.68994140625, 66.0663070678711, 471.80194091796875, 279.3280944824219, 1010.2642822265625, 448.01812744140625, -115.90867614746094, -1.642965316772461, 166.84683227539062, 101.93183135986328, -252.21063232421875, 727.5781860351562, 163.150146484375, 504.6936340332031, 276.92889404296875, 424.97271728515625, -36.251312255859375, 161.76724243164062, 516.8883056640625, 259.4737548828125, 153.16970825195312, 720.3143920898438, 69.57910919189453, -36.84629821777344, -66.20695495605469, 308.672607421875, 204.7183380126953, 691.0513305664062, 41.427734375, 205.1520538330078, 764.3436279296875, 559.4943237304688, 93.3360366821289, 579.118408203125, 150.60362243652344, 165.04080200195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000359.npy"}
|
||||
{"epoch": 0.527165932452276, "step": 360, "batch_size": 64, "mean": 160.10589599609375, "std": 201.30638122558594, "min": -157.98800659179688, "p10": -84.15243225097655, "median": 123.66600036621094, "p90": 430.6164764404297, "max": 602.581298828125, "pos_frac": 0.765625, "sample": [72.60397338867188, 487.18450927734375, 16.674278259277344, 146.1524658203125, -19.821741104125977, 349.68597412109375, 38.828712463378906, 175.6046142578125, 252.25576782226562, 83.11592102050781, 160.30917358398438, 516.372314453125, -136.00169372558594, 327.0738830566406, 602.581298828125, 114.91389465332031, 41.32950973510742, -44.33103942871094, 274.61712646484375, 116.94497680664062, 351.2546691894531, 197.109130859375, -62.178916931152344, 229.64378356933594, 9.282711029052734, 323.7039794921875, 409.8365478515625, 0.7383880615234375, 0.7033271789550781, 423.6487121582031, 519.1878051757812, 310.08038330078125, 540.116455078125, -68.4967041015625, 42.957847595214844, 95.99461364746094, 218.66009521484375, 144.97128295898438, 401.04840087890625, -66.13055419921875, 74.17849731445312, 562.817138671875, -157.98800659179688, -90.86203002929688, -18.295501708984375, 131.9481201171875, -131.15914916992188, 130.38702392578125, -54.44062042236328, -128.1388397216797, 13.397003173828125, 282.02117919921875, 30.057775497436523, 402.9385681152344, 349.7068786621094, 290.70477294921875, 236.2711944580078, -105.2356948852539, 71.29864501953125, 433.6026611328125, 86.61592102050781, -96.2167739868164, -19.075050354003906, 354.0176696777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000360.npy"}
|
||||
{"epoch": 0.5286343612334802, "step": 361, "batch_size": 64, "mean": 163.7840576171875, "std": 259.81756591796875, "min": -471.3898010253906, "p10": -132.01415939331054, "median": 175.52256774902344, "p90": 490.68079223632816, "max": 899.111083984375, "pos_frac": 0.703125, "sample": [36.563682556152344, 208.66366577148438, 48.1236686706543, 332.52716064453125, -59.46521759033203, -290.030029296875, -85.34845733642578, 178.84568786621094, 558.21923828125, 142.8969268798828, 193.26492309570312, 258.2210388183594, -471.3898010253906, 423.88299560546875, -49.69306945800781, -147.07066345214844, 199.87088012695312, 120.61988830566406, -299.96533203125, 19.549619674682617, 213.19403076171875, -106.61199188232422, -62.859375, -47.107845306396484, 197.7414093017578, 626.912109375, 300.8667907714844, 416.84991455078125, 155.57916259765625, 387.8201904296875, -78.78150939941406, -93.76766967773438, 240.06581115722656, 142.05184936523438, 421.4284362792969, 468.8055419921875, -200.96360778808594, 899.111083984375, 669.4783935546875, 486.24713134765625, 226.93040466308594, 190.93174743652344, 547.2060546875, 402.3061218261719, 172.19944763183594, 169.33486938476562, 76.37664031982422, 202.88754272460938, -78.34915161132812, -91.92574310302734, 29.77985382080078, -58.17837905883789, 531.3639526367188, -2.6739501953125, 347.60693359375, 323.815673828125, 153.05099487304688, -142.9008026123047, 264.32684326171875, 75.25846862792969, 180.946044921875, -262.7623596191406, 492.5809326171875, 377.72149658203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000361.npy"}
|
||||
{"epoch": 0.5301027900146843, "step": 362, "batch_size": 64, "mean": 270.9095153808594, "std": 272.2356262207031, "min": -126.67770385742188, "p10": -32.193933105468744, "median": 213.44778442382812, "p90": 629.2233215332031, "max": 1315.2091064453125, "pos_frac": 0.84375, "sample": [98.49105834960938, 108.13796997070312, 478.90435791015625, 582.78125, 330.0921630859375, -29.695152282714844, 1315.2091064453125, 267.40313720703125, 135.19808959960938, 606.608154296875, 532.8388671875, 102.34581756591797, 17.149085998535156, 90.61795043945312, 243.23472595214844, 495.0994567871094, 143.04991149902344, -33.26483917236328, 33.797607421875, 359.6214599609375, 528.747802734375, 115.09646606445312, 16.150854110717773, -19.71331787109375, -45.82695007324219, -15.314477920532227, 135.40451049804688, 163.6787567138672, -83.55225372314453, 683.1929931640625, 611.2517700195312, 108.291748046875, -49.054473876953125, 277.3326110839844, 227.87965393066406, 198.128173828125, 683.5980224609375, -46.503753662109375, 201.13597106933594, 636.9254150390625, 225.7595977783203, 107.58038330078125, 707.014892578125, 432.4812316894531, 124.30228424072266, 122.91971588134766, 375.5538330078125, 186.22198486328125, 64.53773498535156, 747.1544189453125, 236.89706420898438, 816.3407592773438, 496.5503234863281, 378.4884033203125, 256.1536865234375, 492.6634216308594, 492.522705078125, -66.10220336914062, 121.0592269897461, 381.0418395996094, 275.66656494140625, 262.6658020019531, 22.943756103515625, -126.67770385742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000362.npy"}
|
||||
{"epoch": 0.5315712187958884, "step": 363, "batch_size": 64, "mean": 276.02301025390625, "std": 277.0833740234375, "min": -296.2618408203125, "p10": -17.953380584716797, "median": 262.46885681152344, "p90": 637.0526855468751, "max": 1318.193359375, "pos_frac": 0.828125, "sample": [-296.2618408203125, 325.308837890625, 556.087646484375, 239.40878295898438, 72.33436584472656, 225.65057373046875, 263.606689453125, 298.280517578125, 248.5569610595703, 123.57486724853516, 397.0899658203125, 243.76121520996094, 295.3213806152344, 65.3964614868164, 335.5811767578125, 828.3553466796875, -30.96587562561035, 600.638916015625, 60.51087951660156, 740.62841796875, 484.86981201171875, 77.14110565185547, 316.4898376464844, 387.0837707519531, 272.3815612792969, 410.17608642578125, 1318.193359375, 610.1077270507812, -43.1146240234375, 590.2783813476562, 216.46324157714844, 224.01649475097656, 19.079559326171875, 263.5269470214844, 449.1882019042969, 53.08118438720703, 218.0887908935547, -42.769989013671875, 648.6005249023438, 486.4639892578125, 393.7021179199219, -170.70040893554688, 41.41827392578125, 452.5832824707031, -9.612401962280273, 374.17816162109375, 705.8418579101562, -16.9312744140625, 261.4107666015625, 389.3465576171875, -8.447792053222656, 132.5080108642578, -97.71175384521484, 387.313232421875, 269.7020263671875, 24.866004943847656, -5.575660705566406, 220.361572265625, 27.369197845458984, 27.15363311767578, 709.7304077148438, 695.2568359375, -18.39142608642578, 327.88946533203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000363.npy"}
|
||||
{"epoch": 0.5330396475770925, "step": 364, "batch_size": 64, "mean": 206.69784545898438, "std": 280.54583740234375, "min": -531.979736328125, "p10": -79.00487670898436, "median": 192.91400146484375, "p90": 561.0714416503906, "max": 913.1270141601562, "pos_frac": 0.796875, "sample": [428.3495788574219, 251.06996154785156, -235.44664001464844, 35.55663299560547, -531.979736328125, 561.4849853515625, -302.2320861816406, 324.4101867675781, 224.67141723632812, 913.1270141601562, 178.03868103027344, 682.5673828125, 425.2515869140625, 330.28076171875, -53.779624938964844, 72.23728942871094, 56.707000732421875, -269.46978759765625, 307.99188232421875, -63.450469970703125, -98.16595458984375, 417.9654235839844, 223.5526123046875, 448.2863464355469, 326.0611572265625, 24.74907112121582, 9.16299057006836, 505.4217529296875, 207.78932189941406, 251.69549560546875, -26.921249389648438, 668.1683349609375, 133.87118530273438, 4.898956298828125, 137.025390625, 62.1846923828125, 470.53607177734375, 367.88067626953125, 288.35491943359375, -85.67105102539062, 86.06014251708984, 587.715576171875, 635.7139282226562, 135.83103942871094, 268.44140625, -46.26288604736328, -15.014841079711914, 353.80181884765625, 560.1065063476562, 378.0027160644531, 54.7588005065918, 159.83116149902344, 6.715299606323242, 151.3306884765625, 907.6602783203125, 72.46795654296875, 346.093994140625, 287.698974609375, -2.770792007446289, 261.5341796875, 74.34408569335938, 165.4578857421875, 485.03704833984375, -358.1249084472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000364.npy"}
|
||||
{"epoch": 0.5345080763582967, "step": 365, "batch_size": 64, "mean": 218.00540161132812, "std": 263.7609558105469, "min": -199.95213317871094, "p10": -73.59536743164061, "median": 140.25416564941406, "p90": 554.889196777344, "max": 1264.002685546875, "pos_frac": 0.859375, "sample": [179.11703491210938, 106.63468170166016, 118.30010986328125, 136.53077697753906, 132.70343017578125, 137.3994140625, 335.6293029785156, 137.001953125, 242.26699829101562, 102.31210327148438, 317.54071044921875, 506.77581787109375, -81.80325317382812, 215.88070678710938, 51.38478088378906, -54.443634033203125, 84.98979187011719, 771.3493041992188, 582.9484252929688, 326.2999572753906, 107.53919219970703, 56.11042022705078, 74.29500579833984, 18.7869873046875, 186.86007690429688, -166.82525634765625, 100.12031555175781, 1.5705299377441406, 180.69024658203125, 134.62646484375, -140.4829559326172, -45.326900482177734, 111.0181655883789, 39.38961410522461, 403.53125, 664.8314819335938, 391.1079406738281, 63.47169876098633, -118.48281860351562, 454.58636474609375, 284.2005615234375, -168.8353271484375, 243.6294708251953, 36.18976593017578, 84.65508270263672, 335.6429443359375, 267.03668212890625, 232.06317138671875, 322.8642883300781, -141.386962890625, 87.19007110595703, 413.6668395996094, 43.292991638183594, 287.3382873535156, 238.0939483642578, 253.91796875, 1264.002685546875, 575.5092163085938, 501.8890380859375, -199.95213317871094, 785.204345703125, 765.99462890625, 143.10891723632812, 430.7931823730469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000365.npy"}
|
||||
{"epoch": 0.5359765051395007, "step": 366, "batch_size": 64, "mean": 217.76443481445312, "std": 283.3239440917969, "min": -324.861328125, "p10": -123.26521835327148, "median": 192.16505432128906, "p90": 608.4205017089844, "max": 952.6790161132812, "pos_frac": 0.78125, "sample": [-119.03008270263672, 222.9935302734375, 303.7890319824219, 104.31594848632812, -317.4493103027344, 35.111881256103516, -92.21904754638672, 76.772216796875, -80.81814575195312, 451.5819091796875, 94.69235229492188, -225.24105834960938, 480.471923828125, 659.1760864257812, 186.45565795898438, -80.66932678222656, 340.3964538574219, 22.28091812133789, 99.82823944091797, -115.68641662597656, 349.4653015136719, 572.4686889648438, 374.02423095703125, 20.863372802734375, 620.8023681640625, 952.6790161132812, 346.0774841308594, 97.5761947631836, 595.391845703125, 215.10009765625, 161.498779296875, -35.892578125, -180.88116455078125, 614.0042114257812, 363.27862548828125, 393.70330810546875, 839.0698852539062, 353.07806396484375, 197.87445068359375, 136.78749084472656, 242.5475616455078, 138.31024169921875, 71.89141845703125, -149.891357421875, 514.4498291015625, -146.4241180419922, 308.15545654296875, 55.11406326293945, 523.240478515625, 584.0706787109375, 385.25616455078125, 293.3099670410156, 638.2122802734375, 330.35968017578125, 755.7872314453125, 233.55984497070312, -125.08027648925781, 41.45729064941406, -118.67129516601562, 130.60977172851562, 125.68419647216797, 100.5832748413086, 295.5299072265625, -324.861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000366.npy"}
|
||||
{"epoch": 0.5374449339207048, "step": 367, "batch_size": 64, "mean": 183.06765747070312, "std": 227.944580078125, "min": -496.8251647949219, "p10": -60.19221038818359, "median": 155.29256439208984, "p90": 498.3701629638672, "max": 757.6092529296875, "pos_frac": 0.796875, "sample": [100.58868408203125, 169.6249237060547, 503.63397216796875, 69.74288940429688, 615.3511962890625, 6.485410690307617, 372.3145446777344, 159.42425537109375, 545.4365844726562, 19.952369689941406, 28.482666015625, -0.08938407897949219, -64.00433349609375, 498.2732849121094, 457.25909423828125, 310.74267578125, 423.17584228515625, 127.61412048339844, 251.27696228027344, -61.16001892089844, 237.16372680664062, 378.4901428222656, -64.97608184814453, 18.623252868652344, 311.4878845214844, 274.54278564453125, 757.6092529296875, -48.73512268066406, 515.2332153320312, 107.295654296875, 422.67144775390625, 187.6526641845703, -16.75136947631836, 328.208740234375, 63.81416320800781, 203.22482299804688, 300.57427978515625, 285.8527526855469, 367.5028076171875, 82.61767578125, 189.10731506347656, -224.26092529296875, -166.1964569091797, -57.933990478515625, -19.0430908203125, 29.023283004760742, 73.20498657226562, 368.6918640136719, 151.16087341308594, 81.95220947265625, -196.15225219726562, -496.8251647949219, 109.49060821533203, 125.05714416503906, 268.498291015625, 366.6778564453125, 404.83953857421875, 314.4071044921875, 510.38385009765625, 16.253005981445312, 133.5771942138672, -34.846092224121094, 498.41168212890625, 24.622833251953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000367.npy"}
|
||||
{"epoch": 0.5389133627019089, "step": 368, "batch_size": 64, "mean": 287.25018310546875, "std": 220.2408905029297, "min": -212.74026489257812, "p10": 1.4839202880859546, "median": 306.0147247314453, "p90": 556.0627563476563, "max": 1054.4293212890625, "pos_frac": 0.890625, "sample": [316.7446594238281, 326.3857116699219, 222.967041015625, 484.5563049316406, 390.0106201171875, 570.5964965820312, 163.81048583984375, 47.58531188964844, 559.35595703125, 173.33718872070312, -25.441707611083984, 426.9725341796875, 470.67242431640625, 74.89556884765625, 298.5332336425781, 395.4399719238281, 313.4962158203125, 225.68663024902344, -38.693885803222656, 416.2152404785156, 209.84280395507812, 291.4240417480469, 550.8099365234375, 335.0853576660156, 372.9167175292969, 116.31402587890625, 445.59344482421875, -5.625865936279297, 22.412891387939453, 18.073421478271484, 167.0222625732422, 435.83734130859375, 397.8533020019531, 352.6956787109375, 348.3077392578125, 590.1222534179688, 396.9013671875, 61.606475830078125, 179.1294708251953, -38.540122985839844, 321.7934265136719, 512.017333984375, 420.6696472167969, 580.2923583984375, 155.85118103027344, 295.9757995605469, 667.0343017578125, 20.890426635742188, -212.74026489257812, 115.14227294921875, 107.81922149658203, 513.0592651367188, 449.9888000488281, 483.3740539550781, 352.7123107910156, 32.164459228515625, 228.07949829101562, 1054.4293212890625, 215.280029296875, 254.54812622070312, 283.0296936035156, -23.771041870117188, -62.85129928588867, 558.31396484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000368.npy"}
|
||||
{"epoch": 0.540381791483113, "step": 369, "batch_size": 64, "mean": 153.03839111328125, "std": 271.9935302734375, "min": -629.9818725585938, "p10": -121.50907745361327, "median": 134.21212005615234, "p90": 480.5529174804688, "max": 853.2477416992188, "pos_frac": 0.765625, "sample": [-216.3404541015625, -101.85841369628906, 853.2477416992188, -31.0222225189209, -252.38609313964844, 31.226438522338867, -80.93291473388672, 288.77740478515625, 18.83327865600586, 686.554443359375, 214.49813842773438, -78.94699096679688, 148.97994995117188, 306.96636962890625, 283.89898681640625, 796.2073364257812, 472.5619201660156, 162.90191650390625, 37.55424880981445, 400.5149841308594, 305.0142822265625, 111.86601257324219, 104.39340209960938, 238.97894287109375, 228.96875, 176.7203369140625, -464.74896240234375, 224.31353759765625, 181.89639282226562, 114.90904998779297, 263.048583984375, 383.9266357421875, 104.49651336669922, 106.60572814941406, 173.27890014648438, 27.97447967529297, 345.1593017578125, 92.21868133544922, -124.63179016113281, 364.76129150390625, 483.9776306152344, -186.1616668701172, 706.0120849609375, 48.19700622558594, 487.5111999511719, 171.82415771484375, 397.09588623046875, -365.625732421875, -1.1947212219238281, 607.808837890625, 177.63153076171875, 119.44429016113281, -629.9818725585938, 82.91461944580078, 200.74481201171875, -114.22274780273438, -96.13801574707031, 88.80651092529297, 115.50763702392578, 368.9046630859375, 175.46505737304688, 9.108177185058594, 99.00115966796875, -52.560569763183594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000369.npy"}
|
||||
{"epoch": 0.5418502202643172, "step": 370, "batch_size": 64, "mean": 144.05679321289062, "std": 282.9492492675781, "min": -442.21746826171875, "p10": -227.66142120361326, "median": 148.87486267089844, "p90": 501.4794403076172, "max": 854.189208984375, "pos_frac": 0.703125, "sample": [95.0081558227539, 291.6215515136719, 212.36180114746094, -390.48095703125, 721.2814331054688, 371.71649169921875, 114.6275405883789, 254.25502014160156, -9.505525588989258, -50.816078186035156, 409.15203857421875, 496.7566833496094, 43.167449951171875, 159.1536865234375, -118.00753784179688, 503.50347900390625, 222.66433715820312, 125.34783935546875, 230.96780395507812, 854.189208984375, 772.5557250976562, 182.45928955078125, 326.0570373535156, -115.7553939819336, 281.4033203125, -186.47994995117188, 41.1754150390625, 302.79803466796875, -219.09463500976562, 185.00454711914062, 542.37255859375, 113.16146850585938, -1.7650604248046875, 282.17486572265625, -23.003273010253906, 399.26898193359375, 249.90469360351562, -136.48312377929688, -341.54559326171875, 24.472631454467773, 181.65994262695312, 231.1632080078125, -231.33290100097656, 83.12673950195312, 439.086669921875, 117.86154174804688, 203.98944091796875, 294.642333984375, 221.9668731689453, -442.21746826171875, -102.95924377441406, -50.95733642578125, 109.89163208007812, 138.59603881835938, -374.4965515136719, -314.4357604980469, 9.725128173828125, -282.4848937988281, -128.95339965820312, 464.76171875, 609.602294921875, 244.56671142578125, 10.53436279296875, 570.6514892578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000370.npy"}
|
||||
{"epoch": 0.5433186490455213, "step": 371, "batch_size": 64, "mean": 212.34832763671875, "std": 300.55584716796875, "min": -530.7329711914062, "p10": -114.33891525268554, "median": 156.32872772216797, "p90": 612.3943664550783, "max": 933.8975219726562, "pos_frac": 0.734375, "sample": [384.90509033203125, 274.7866516113281, 536.3433837890625, 84.19417572021484, -530.7329711914062, 159.56430053710938, 563.6571655273438, 211.9822998046875, 804.9826049804688, 305.78778076171875, 561.7298583984375, -19.54856300354004, 345.66094970703125, 203.22055053710938, -301.4771423339844, 96.19136047363281, -15.62176513671875, 188.54580688476562, 489.59454345703125, 1.3438720703125, -44.06541442871094, -243.52552795410156, 633.28173828125, -117.80857849121094, -202.185791015625, 121.70153045654297, 931.2005004882812, 674.3510131835938, -222.46229553222656, 40.796905517578125, 712.9678344726562, -24.754074096679688, 330.73974609375, 249.52720642089844, -106.24303436279297, -18.536849975585938, 549.4996337890625, -133.14236450195312, 255.3057861328125, 444.22784423828125, -49.448822021484375, 114.91703033447266, 728.057861328125, 77.9625015258789, 17.791027069091797, 246.80892944335938, 286.82305908203125, 385.9934387207031, 478.0614013671875, -27.868240356445312, 431.00372314453125, 147.20721435546875, 117.14292907714844, 338.347412109375, 153.09315490722656, 151.11233520507812, 118.56651306152344, 73.30722045898438, -27.910232543945312, 523.9833374023438, 275.51336669921875, 933.8975219726562, -86.51957702636719, 6.463768005371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000371.npy"}
|
||||
{"epoch": 0.5447870778267254, "step": 372, "batch_size": 64, "mean": 255.53970336914062, "std": 328.2554016113281, "min": -386.96002197265625, "p10": -131.11302261352537, "median": 183.35711669921875, "p90": 732.0615417480469, "max": 1299.8232421875, "pos_frac": 0.765625, "sample": [323.06658935546875, 311.214111328125, 732.9933471679688, -245.63978576660156, 46.43260192871094, 667.1011962890625, 518.7196655273438, 226.1856689453125, 301.4213562011719, -15.942413330078125, 1299.8232421875, 310.9601135253906, -17.17266845703125, 164.89535522460938, -38.349273681640625, 107.3897933959961, 66.95442199707031, 88.61092376708984, 761.1295166015625, -203.93795776367188, 131.7937774658203, 175.45127868652344, 729.8873291015625, -151.36483764648438, 511.4277038574219, 150.3231201171875, 104.9088363647461, 179.96218872070312, 186.75204467773438, 150.75286865234375, 177.78265380859375, 344.53466796875, 364.08203125, -125.52489471435547, 173.90997314453125, -124.3689193725586, 367.77642822265625, 739.3311157226562, 95.7076187133789, 1060.918701171875, 634.4934692382812, 351.196533203125, 187.8010711669922, -152.47042846679688, 487.6734619140625, 485.3784484863281, 717.7850952148438, 234.75462341308594, -10.032913208007812, 329.57098388671875, 159.76541137695312, 268.4546813964844, 100.64468383789062, -70.67186737060547, 773.6585693359375, 150.55137634277344, -23.452791213989258, 208.61474609375, -133.5079345703125, 875.1115112304688, -204.22122192382812, -386.96002197265625, 531.4111938476562, 189.09304809570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000372.npy"}
|
||||
{"epoch": 0.5462555066079295, "step": 373, "batch_size": 64, "mean": 242.35186767578125, "std": 282.3642272949219, "min": -497.9678649902344, "p10": -70.8711776733398, "median": 207.74065399169922, "p90": 627.711328125, "max": 812.9537963867188, "pos_frac": 0.84375, "sample": [114.18387603759766, -164.12538146972656, -215.465576171875, 258.3079833984375, 57.21270751953125, 812.9537963867188, 456.7564392089844, 49.73866271972656, 245.36648559570312, 164.85223388671875, 683.292724609375, 190.5975799560547, -31.024078369140625, 271.6952209472656, 391.7835388183594, 58.79712677001953, 24.934627532958984, 749.9470825195312, 342.1835021972656, 12.078544616699219, -395.5986022949219, -33.972869873046875, 229.16537475585938, -497.9678649902344, 104.89239501953125, 58.12694549560547, 163.29525756835938, 132.5430908203125, 596.6473388671875, 443.8326416015625, 207.8041534423828, 486.19219970703125, 337.75830078125, 600.3876342773438, 461.4230041503906, -87.69711303710938, -179.30374145507812, 631.207763671875, 275.1749267578125, -3.8444671630859375, 778.1350708007812, 130.58712768554688, 392.60186767578125, 190.2716064453125, 388.0401916503906, 488.61700439453125, 284.5907897949219, 257.46282958984375, 564.7015380859375, 777.5294189453125, 163.36212158203125, 53.429107666015625, 111.73382568359375, 207.67715454101562, 0.2546539306640625, 283.77734375, 105.85182189941406, 126.81939697265625, 747.7547607421875, 404.62841796875, 440.6748962402344, 619.552978515625, -86.68473815917969, 75.01793670654297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000373.npy"}
|
||||
{"epoch": 0.5477239353891337, "step": 374, "batch_size": 64, "mean": 218.37730407714844, "std": 358.6478576660156, "min": -701.4334106445312, "p10": -113.41287689208983, "median": 142.73760223388672, "p90": 578.4334106445314, "max": 1318.2310791015625, "pos_frac": 0.734375, "sample": [-144.15353393554688, 134.93075561523438, -32.48487854003906, 1318.2310791015625, 328.66693115234375, 1198.518310546875, 243.31939697265625, 600.25244140625, 449.69940185546875, -17.50818634033203, 140.36660766601562, 331.1541442871094, 30.978912353515625, 202.41940307617188, 66.30322265625, -48.58747100830078, 530.6968383789062, 52.210113525390625, 588.8627319335938, 358.5140075683594, -62.0830078125, 8.15389633178711, 425.79473876953125, 60.08003234863281, -87.26338195800781, 893.0155029296875, -5.133474349975586, 84.22975158691406, -234.11143493652344, 337.5664367675781, 255.29196166992188, 273.0031433105469, -118.93002319335938, 959.2194213867188, -482.6185302734375, 146.9589080810547, -6.9329986572265625, 545.6192016601562, 119.22077178955078, -39.873565673828125, 145.1085968017578, 82.03235626220703, 65.1303482055664, 101.35415649414062, 285.8255310058594, 238.39187622070312, -164.9590301513672, 521.6366577148438, 305.36187744140625, 459.28973388671875, 105.07511901855469, 554.0983276367188, -100.53953552246094, 415.6943359375, 296.5132751464844, -32.565086364746094, -150.00177001953125, 223.25823974609375, 6.7643280029296875, 79.71345520019531, 1167.2642822265625, -701.4334106445312, 163.34759521484375, 506.1877136230469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000374.npy"}
|
||||
{"epoch": 0.5491923641703378, "step": 375, "batch_size": 64, "mean": 210.68402099609375, "std": 308.8144226074219, "min": -534.0509033203125, "p10": -153.57788772583, "median": 174.32685089111328, "p90": 584.2585327148439, "max": 1031.4248046875, "pos_frac": 0.765625, "sample": [649.7489013671875, 325.79510498046875, -13.9429931640625, 282.2173156738281, -99.38050842285156, -175.7362060546875, 482.69635009765625, 228.3541259765625, 260.34295654296875, -24.451587677001953, -10.083627700805664, -43.47456359863281, 19.727378845214844, -534.0509033203125, 140.70254516601562, 905.8597412109375, 391.21002197265625, 49.33123779296875, 329.87481689453125, 132.46624755859375, 24.779876708984375, 242.15390014648438, -470.1048583984375, 28.085365295410156, 465.818603515625, -234.0392608642578, 597.2655029296875, 75.41748046875, 140.60296630859375, 76.25028228759766, -337.1936950683594, 99.07133483886719, 553.908935546875, 651.6192016601562, 31.628345489501953, 265.5318603515625, 321.0325927734375, 493.752197265625, 544.479248046875, 684.339111328125, 407.2757873535156, 186.20762634277344, 162.44607543945312, -351.08673095703125, 453.01263427734375, 467.0486145019531, 289.4651794433594, -243.85769653320312, 1031.4248046875, 520.5957641601562, 364.65576171875, 0.6989707946777344, -1.636627197265625, 358.9908752441406, 128.36074829101562, 515.4788208007812, 107.46662902832031, 620.0242919921875, -101.8751449584961, 159.48162841796875, 424.873046875, 366.9378356933594, 97.452880859375, -31.270153045654297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000375.npy"}
|
||||
{"epoch": 0.5506607929515418, "step": 376, "batch_size": 64, "mean": 263.78704833984375, "std": 319.98492431640625, "min": -564.4385986328125, "p10": -125.00766525268554, "median": 231.02755737304688, "p90": 678.4385559082032, "max": 859.479248046875, "pos_frac": 0.796875, "sample": [-122.6146011352539, 754.2797241210938, 237.70455932617188, 41.56057357788086, 859.479248046875, 88.04414367675781, 496.7021484375, -247.0748748779297, 22.633163452148438, -4.5548553466796875, 389.6141357421875, -0.022144317626953125, 649.30712890625, 120.19096374511719, -243.62051391601562, 500.8436279296875, 582.4832153320312, -211.99386596679688, -111.35592651367188, 176.4796600341797, 89.31260681152344, 12.757917404174805, -126.03326416015625, 842.1654663085938, 750.9877319335938, 139.2799072265625, 442.259521484375, 312.3701171875, 508.523681640625, -90.21275329589844, 686.4537353515625, 521.4011840820312, 141.42250061035156, 587.4425659179688, 125.24871826171875, 591.7324829101562, 518.0418701171875, 326.9480285644531, 458.65478515625, 266.297119140625, 229.5517120361328, 538.7024536132812, -126.66911315917969, 246.91436767578125, 637.3565673828125, 95.30015563964844, 847.85888671875, 15.457740783691406, -154.45213317871094, 47.85974884033203, 26.681137084960938, 112.16329193115234, 641.848388671875, 682.3639526367188, 304.38153076171875, 669.279296875, 164.84117126464844, 304.678466796875, 232.50340270996094, 192.314208984375, -90.4361801147461, 99.5538330078125, 645.618896484375, -564.4385986328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000376.npy"}
|
||||
{"epoch": 0.5521292217327459, "step": 377, "batch_size": 64, "mean": 256.0513916015625, "std": 302.17694091796875, "min": -424.00677490234375, "p10": -87.80182266235352, "median": 216.34427642822266, "p90": 689.5483337402346, "max": 919.81640625, "pos_frac": 0.765625, "sample": [386.0150146484375, 919.81640625, 106.280029296875, -80.677734375, 320.8782653808594, -40.0347900390625, 238.55441284179688, -161.38925170898438, 275.19952392578125, 54.69397735595703, 850.3357543945312, 743.04345703125, -37.81499481201172, 903.6669921875, 41.880149841308594, 215.96365356445312, 208.10557556152344, 496.2308654785156, 211.94622802734375, -44.00811767578125, 242.0296630859375, 627.7178344726562, 457.1817932128906, -22.826705932617188, 584.8834228515625, 881.6124877929688, 416.6796569824219, -198.31199645996094, 623.0616455078125, 179.54763793945312, 335.864013671875, 216.7248992919922, 55.76726531982422, 147.36801147460938, 58.66006088256836, 467.05303955078125, 114.48088073730469, 332.4678039550781, 456.75115966796875, 637.99609375, 254.01759338378906, -53.0723876953125, -424.00677490234375, 154.96185302734375, -116.30984497070312, 87.4527816772461, 492.78192138671875, 401.608154296875, -224.0575714111328, 354.84423828125, 131.4658660888672, 711.6421508789062, -73.94490051269531, -32.1806640625, -132.95184326171875, 570.6139526367188, 199.16207885742188, 737.8721923828125, 378.188232421875, 14.437644958496094, 341.0670166015625, 174.85507202148438, 306.30206298828125, -90.8550033569336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000377.npy"}
|
||||
{"epoch": 0.55359765051395, "step": 378, "batch_size": 64, "mean": 267.5641174316406, "std": 287.4216613769531, "min": -544.346435546875, "p10": -60.18887100219727, "median": 245.7113037109375, "p90": 636.6937744140625, "max": 1116.73193359375, "pos_frac": 0.84375, "sample": [730.643310546875, 326.75872802734375, 635.87841796875, 645.97021484375, 476.34747314453125, 122.03677368164062, 241.14031982421875, 366.21771240234375, 148.177734375, 202.06787109375, 342.19281005859375, 384.49273681640625, 653.689453125, 182.09254455566406, 125.15463256835938, -33.72740173339844, 266.49127197265625, 712.3019409179688, 242.11294555664062, 304.8567810058594, 195.60211181640625, -73.76026153564453, -58.125667572021484, 58.03620147705078, 249.30966186523438, 212.1263427734375, -69.09080505371094, 513.3075561523438, 59.487754821777344, 496.7477722167969, 518.3287353515625, 499.1545104980469, 276.3764953613281, 85.7857666015625, 3.9350357055664062, 485.8636169433594, 210.5810546875, 1116.73193359375, 49.83470153808594, -296.29412841796875, 280.35064697265625, -71.36736297607422, 201.3553924560547, 610.66162109375, 158.1201171875, -58.18182373046875, 470.1217956542969, 328.7126159667969, 502.0570983886719, 359.5914001464844, 104.43357849121094, 301.0455322265625, 466.2216796875, 269.09576416015625, -544.346435546875, 196.1608428955078, 234.27987670898438, 513.2183837890625, -61.049034118652344, 797.9596557617188, 99.24546813964844, -428.38641357421875, 148.92599487304688, 637.043212890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000378.npy"}
|
||||
{"epoch": 0.5550660792951542, "step": 379, "batch_size": 64, "mean": 259.21221923828125, "std": 330.3843994140625, "min": -525.1060791015625, "p10": -116.05961990356442, "median": 221.10037994384766, "p90": 678.0278442382813, "max": 1335.815673828125, "pos_frac": 0.8125, "sample": [244.00332641601562, 490.0196533203125, 18.487640380859375, 222.3914794921875, -73.41796112060547, 96.85285186767578, 213.26150512695312, 691.0101318359375, 197.95626831054688, 127.7989501953125, 398.0243835449219, 878.6036376953125, 5.7932586669921875, 499.9867248535156, 343.41156005859375, 216.89599609375, -525.1060791015625, 969.6152954101562, 43.39793395996094, 294.2813720703125, 542.1021118164062, 418.6430358886719, 339.47216796875, 13.355060577392578, 127.22207641601562, -73.42648315429688, -306.30780029296875, 148.2393035888672, 23.66657257080078, 440.16900634765625, 771.8004150390625, 396.45465087890625, -287.05206298828125, 94.45635986328125, 487.4359436035156, 755.885009765625, 337.61090087890625, 384.5936279296875, 647.73583984375, 122.74783325195312, -93.12726593017578, 587.4072265625, 241.39697265625, -44.64850997924805, 465.34625244140625, -137.37249755859375, 180.39785766601562, 418.3856506347656, 299.84979248046875, 219.8092803955078, -125.88777160644531, 411.9046630859375, 763.4208374023438, 211.54408264160156, 1335.815673828125, 303.6401062011719, 31.20519256591797, 512.153564453125, 116.24272918701172, -252.9632568359375, 49.24568176269531, 602.2413330078125, -181.5408935546875, -62.95707702636719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000379.npy"}
|
||||
{"epoch": 0.5565345080763583, "step": 380, "batch_size": 64, "mean": 284.94622802734375, "std": 329.4243469238281, "min": -822.4442138671875, "p10": -73.90530471801756, "median": 303.6613006591797, "p90": 671.7466064453125, "max": 1180.853271484375, "pos_frac": 0.859375, "sample": [140.95538330078125, 108.70301818847656, 83.46730041503906, 1180.853271484375, 64.51900482177734, 99.0707015991211, 146.96139526367188, 358.99493408203125, 481.08245849609375, 345.08905029296875, 126.76409149169922, 427.3838195800781, -52.574317932128906, 616.7847290039062, 855.660400390625, 467.66583251953125, 383.0420837402344, 467.538330078125, 516.587646484375, 175.86474609375, 438.6531066894531, -262.88385009765625, 236.1798095703125, 241.666015625, 83.58517456054688, -80.04222869873047, 481.0125732421875, 362.8634033203125, 130.84426879882812, 668.4896240234375, 378.74468994140625, 668.5750732421875, 620.29248046875, 658.1673583984375, 673.1058349609375, 404.0108642578125, 109.80610656738281, -59.5858154296875, 285.1138610839844, 41.85393524169922, 813.211181640625, 65.27619934082031, -254.8602294921875, 194.50592041015625, -262.82684326171875, 214.86392211914062, 322.208740234375, 687.5350952148438, 63.37518310546875, -353.170654296875, 444.02252197265625, 22.618131637573242, 747.7872314453125, 354.48211669921875, 277.45623779296875, -98.76969146728516, 514.3594360351562, 368.3463134765625, 186.1463165283203, 106.96685791015625, -822.4442138671875, 344.63531494140625, 844.493896484375, 381.47344970703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000380.npy"}
|
||||
{"epoch": 0.5580029368575624, "step": 381, "batch_size": 64, "mean": 199.41497802734375, "std": 334.237548828125, "min": -688.20458984375, "p10": -163.53164825439453, "median": 200.0733871459961, "p90": 593.6789855957032, "max": 1055.070556640625, "pos_frac": 0.75, "sample": [263.9048767089844, 12.512603759765625, 54.8121337890625, 637.724853515625, 486.8897705078125, 689.1968383789062, 513.5513305664062, -31.367233276367188, 127.62928009033203, 555.89599609375, 424.46148681640625, 114.28196716308594, 405.9099426269531, 276.447021484375, 1019.1429443359375, -180.67535400390625, 445.147705078125, 361.45635986328125, 689.4347534179688, 213.20437622070312, 1004.1549682617188, 599.7503051757812, -232.70162963867188, -156.2824249267578, -129.19703674316406, 131.79269409179688, 579.5125732421875, 60.86277389526367, -297.4479064941406, -93.48859405517578, 139.12266540527344, 216.038818359375, 97.57780456542969, -41.799678802490234, 258.5462646484375, -104.01937866210938, 255.68417358398438, 113.01343536376953, 214.55166625976562, -77.18399047851562, 13.485435485839844, 109.37384033203125, 1055.070556640625, 223.14137268066406, 386.79345703125, 408.819091796875, 49.20965576171875, -166.63845825195312, 444.4517822265625, -108.21876525878906, 29.451040267944336, 369.57879638671875, -86.07766723632812, 233.59657287597656, 113.72087860107422, -250.05514526367188, 63.962158203125, 338.9273681640625, 401.82080078125, 235.62107849121094, 186.94239807128906, -688.20458984375, -600.254638671875, 379.992919921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000381.npy"}
|
||||
{"epoch": 0.5594713656387665, "step": 382, "batch_size": 64, "mean": 247.65719604492188, "std": 257.2524719238281, "min": -289.37298583984375, "p10": -76.29117355346679, "median": 199.0723876953125, "p90": 630.066912841797, "max": 970.65380859375, "pos_frac": 0.8125, "sample": [970.65380859375, 686.3150024414062, 151.23715209960938, 136.43417358398438, 153.9752655029297, 719.119384765625, -76.7417984008789, 113.1079330444336, 299.3489990234375, -167.76397705078125, 151.52955627441406, 248.24783325195312, 396.74114990234375, 423.8822021484375, 485.13677978515625, 510.1117858886719, 215.80409240722656, 77.19276428222656, 443.1980285644531, 123.55104064941406, 377.1105041503906, -77.76359558105469, 34.57337951660156, 354.0921936035156, 747.914794921875, -101.35762786865234, 356.63739013671875, -75.23971557617188, 549.9948120117188, 271.1094055175781, 658.2052001953125, -93.09783172607422, -28.39468002319336, 332.7884521484375, 438.6501770019531, -289.37298583984375, -141.20388793945312, 410.55926513671875, -15.626861572265625, 327.14813232421875, 180.028076171875, 254.8277587890625, 361.38726806640625, 87.21337127685547, 189.96449279785156, 84.86100769042969, 123.60023498535156, 231.73385620117188, 208.18028259277344, 138.1802215576172, 664.8914794921875, 586.2080078125, 308.22845458984375, 11.889307022094727, 184.3524169921875, 648.8635864257812, 72.45106506347656, -12.86227798461914, 481.10479736328125, 579.17138671875, 153.0309295654297, 106.64153289794922, -15.125015258789062, 123.4306640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000382.npy"}
|
||||
{"epoch": 0.5609397944199707, "step": 383, "batch_size": 64, "mean": 239.6513671875, "std": 339.3735046386719, "min": -892.0908203125, "p10": -108.47313461303709, "median": 243.4184341430664, "p90": 724.4600341796876, "max": 1001.7495727539062, "pos_frac": 0.734375, "sample": [-10.541221618652344, -76.20556640625, 104.60549926757812, -299.46539306640625, 543.1724243164062, -215.33973693847656, -125.14019012451172, 568.435791015625, 338.90032958984375, 48.01994705200195, 287.3232421875, 185.0607147216797, 87.114013671875, -44.47724914550781, -345.3433837890625, 255.73956298828125, 921.48291015625, 6.504798889160156, 267.8844909667969, 313.4036865234375, 781.0933837890625, -145.10842895507812, -9.863826751708984, 693.9224243164062, 181.62322998046875, -89.43701934814453, 730.9598388671875, 89.42451477050781, -79.12054443359375, 295.401123046875, 1001.7495727539062, 205.86634826660156, 709.2938232421875, 235.739990234375, 406.39837646484375, 413.34063720703125, 455.4978942871094, 69.13241577148438, 238.2721710205078, 286.837158203125, -51.719146728515625, -23.46868896484375, 253.46524047851562, 73.71937561035156, 343.1187438964844, 783.5463256835938, -116.6314697265625, 547.0075073242188, 19.891250610351562, 316.8260498046875, 731.6793823242188, 97.90409088134766, -86.20658874511719, 314.3375549316406, 407.86761474609375, -1.9034957885742188, 812.1634521484375, 563.1771240234375, 248.564697265625, 596.95556640625, -892.0908203125, 131.20809936523438, 466.36669921875, 519.75048828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000383.npy"}
|
||||
{"epoch": 0.5624082232011748, "step": 384, "batch_size": 64, "mean": 189.2479248046875, "std": 235.35971069335938, "min": -377.0197448730469, "p10": -88.67387008666992, "median": 187.73779296875, "p90": 458.44370117187503, "max": 889.911376953125, "pos_frac": 0.8125, "sample": [188.94671630859375, 47.865203857421875, 889.911376953125, -154.76988220214844, 3.512847900390625, 95.09355163574219, -210.24607849121094, -33.50071716308594, 731.3986206054688, 179.38287353515625, 468.6391296386719, 463.735595703125, 176.4945831298828, -232.91595458984375, 384.3327941894531, 69.46673583984375, -91.33332824707031, 19.232952117919922, 355.55023193359375, 368.9561767578125, 186.52886962890625, 270.9286804199219, 72.09907531738281, 405.02593994140625, 297.7718505859375, 148.88246154785156, 149.31463623046875, -238.34796142578125, -377.0197448730469, 72.69640350341797, 299.4001770019531, 140.7339630126953, 50.94215393066406, 208.88388061523438, 191.0664825439453, -75.08219146728516, 394.8312683105469, 267.80987548828125, 340.10772705078125, 66.4442138671875, 13.227123260498047, 237.9697723388672, 340.90081787109375, 528.322509765625, 436.6484680175781, 272.3250732421875, 306.0452880859375, 280.83050537109375, 234.96804809570312, -170.7821044921875, 254.43060302734375, -54.600975036621094, 131.9688720703125, 482.96893310546875, 114.8021240234375, -82.46846771240234, -63.385372161865234, 446.095947265625, 429.10919189453125, 115.77314758300781, 590.9354248046875, 320.7110290527344, 51.91046142578125, 300.3902587890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000384.npy"}
|
||||
{"epoch": 0.5638766519823789, "step": 385, "batch_size": 64, "mean": 237.38702392578125, "std": 239.79306030273438, "min": -343.0481872558594, "p10": 2.738681030273439, "median": 208.0983123779297, "p90": 584.7101196289062, "max": 788.698486328125, "pos_frac": 0.90625, "sample": [40.59161376953125, 12.794807434082031, 66.61632537841797, 261.1031799316406, 446.3179931640625, 312.5791931152344, 443.4898681640625, 206.22781372070312, -149.19332885742188, 495.5787353515625, 119.97792053222656, 19.188016891479492, 116.1703872680664, 185.279541015625, 506.334228515625, 162.96517944335938, 643.3267822265625, 311.01177978515625, 87.76317596435547, 137.63345336914062, 366.5035705566406, 151.13783264160156, -162.24610900878906, 300.35137939453125, -208.15072631835938, 240.5872802734375, 201.59471130371094, 585.77392578125, 110.94660949707031, 60.7073974609375, 348.77392578125, 251.25218200683594, 152.514404296875, 759.546630859375, 788.698486328125, 201.6394805908203, 204.35397338867188, 2.0371856689453125, 582.2279052734375, 267.36236572265625, 4.3755035400390625, 116.00904846191406, 445.8387756347656, 145.8434295654297, 222.1680908203125, 660.2741088867188, 387.1573181152344, -88.89483642578125, 641.78759765625, 325.249755859375, -343.0481872558594, 33.51808166503906, 160.38803100585938, -248.0113525390625, 213.95928955078125, 296.28277587890625, 482.1225891113281, 92.24286651611328, 308.45355224609375, 698.8436279296875, 109.12117004394531, 345.4481506347656, 209.96881103515625, 342.3018493652344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000385.npy"}
|
||||
{"epoch": 0.5653450807635829, "step": 386, "batch_size": 64, "mean": 248.9279022216797, "std": 302.6310119628906, "min": -470.534423828125, "p10": -130.249641418457, "median": 188.51953125, "p90": 667.7709838867189, "max": 967.8319702148438, "pos_frac": 0.828125, "sample": [-90.14752197265625, 389.5399475097656, -62.92571258544922, 388.6624450683594, 678.7308349609375, 202.88938903808594, 69.2785415649414, 480.0964050292969, 146.21493530273438, 427.78863525390625, 385.453857421875, 949.4689331054688, -279.985595703125, -36.386749267578125, 641.3968505859375, 341.6405029296875, 311.3150939941406, 411.4570617675781, 53.07438659667969, 113.85279083251953, 168.59286499023438, 258.97247314453125, 174.3392333984375, 171.30844116210938, 301.4281311035156, 265.4710998535156, 97.5892105102539, -155.66943359375, 967.8319702148438, 642.197998046875, -67.99078369140625, 128.82144165039062, 148.87759399414062, -178.44256591796875, 194.44129943847656, 795.821533203125, 931.5291137695312, 304.13214111328125, 706.8428955078125, 156.72166442871094, 234.40786743164062, 574.8775024414062, -230.5191192626953, 182.59776306152344, 43.370147705078125, 141.65045166015625, 174.38929748535156, 173.3975067138672, -147.43626403808594, 365.34613037109375, -470.534423828125, 754.8136596679688, 52.68592071533203, 84.1245346069336, 95.2716064453125, 161.9370880126953, 508.81451416015625, 364.1797180175781, -326.23114013671875, 466.9902038574219, 496.47479248046875, 242.67848205566406, 350.3615417480469, 103.50664520263672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000386.npy"}
|
||||
{"epoch": 0.566813509544787, "step": 387, "batch_size": 64, "mean": 209.68357849121094, "std": 258.1988830566406, "min": -412.9181823730469, "p10": -65.83155364990235, "median": 203.55902862548828, "p90": 503.2237579345703, "max": 880.962890625, "pos_frac": 0.796875, "sample": [309.4371337890625, -34.744468688964844, 121.27146911621094, 207.34361267089844, 232.078125, 880.962890625, 199.77444458007812, 119.99555969238281, 393.8467102050781, 428.40655517578125, -412.9181823730469, 306.58221435546875, 130.43951416015625, 28.666576385498047, -363.11236572265625, 217.24032592773438, 503.426025390625, 567.4124145507812, -175.95266723632812, 105.30642700195312, 828.1380004882812, 61.7862663269043, 218.36526489257812, 707.01953125, 391.9757080078125, 141.53244018554688, 138.98373413085938, 233.76077270507812, 66.02849578857422, 166.9505157470703, 48.096778869628906, 174.46131896972656, 470.9453430175781, 175.87452697753906, 501.0801696777344, 150.74041748046875, 340.5789794921875, 102.13291931152344, 279.2355041503906, 360.5885314941406, 69.24376678466797, -6.7341718673706055, 635.8150634765625, 502.7518005371094, 361.8861999511719, 109.111572265625, 370.8709411621094, -49.099857330322266, 538.4710693359375, -66.19960021972656, 390.3143310546875, -148.96499633789062, 13.772941589355469, 338.20355224609375, -218.00479125976562, 249.1416778564453, -10.000190734863281, 394.45941162109375, -64.9727783203125, -29.353530883789062, 287.61236572265625, -274.4200134277344, 278.8827209472656, 423.22418212890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000387.npy"}
|
||||
{"epoch": 0.5682819383259912, "step": 388, "batch_size": 64, "mean": 281.1998291015625, "std": 330.2901306152344, "min": -527.3045043945312, "p10": -29.57113800048826, "median": 218.10914611816406, "p90": 693.3907226562502, "max": 1533.1505126953125, "pos_frac": 0.84375, "sample": [1533.1505126953125, 567.5638427734375, -85.27165222167969, 517.2579345703125, 551.2025146484375, 805.0408325195312, 165.4611358642578, -256.3410949707031, 136.0897216796875, 296.4772644042969, 153.96710205078125, 864.2305908203125, 520.6636352539062, 246.9364013671875, -527.3045043945312, 59.42719268798828, 283.021484375, 203.5260772705078, 436.677978515625, -5.679893493652344, 152.68798828125, 735.2926635742188, 164.84176635742188, -69.03998565673828, 136.3265838623047, 237.09970092773438, 78.2639389038086, -9.674148559570312, 104.92868041992188, 273.5552673339844, 103.76354217529297, 146.34532165527344, 283.64642333984375, 59.276947021484375, 487.100341796875, 191.6710205078125, 147.48155212402344, 81.57489776611328, 551.1841430664062, 107.29105377197266, 432.4689025878906, 74.81678771972656, 404.0882568359375, 150.77774047851562, 187.83331298828125, 27.805458068847656, 444.4593505859375, 331.11578369140625, -233.81326293945312, 374.4888610839844, 334.24334716796875, 1170.326416015625, -138.03541564941406, 366.86065673828125, 244.05909729003906, 29.502029418945312, -38.098419189453125, 547.9444580078125, 717.1830444335938, 788.77783203125, 637.8753051757812, 488.7724914550781, 232.6922149658203, -9.07151985168457], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000388.npy"}
|
||||
{"epoch": 0.5697503671071953, "step": 389, "batch_size": 64, "mean": 243.48245239257812, "std": 315.5850524902344, "min": -373.575927734375, "p10": -87.1248046875, "median": 242.57070922851562, "p90": 605.4242919921876, "max": 1009.7823486328125, "pos_frac": 0.71875, "sample": [55.9061279296875, 534.0128784179688, 412.7095642089844, -332.7928161621094, 157.57748413085938, 308.31317138671875, 216.8977813720703, 594.7637329101562, 419.77239990234375, 240.1168212890625, 272.6637268066406, -84.78882598876953, 331.1214599609375, -213.07473754882812, 146.623046875, 502.9269714355469, 132.45718383789062, 435.86370849609375, 436.5173034667969, -77.70718383789062, -242.36953735351562, -34.382137298583984, 609.9931030273438, -34.902652740478516, 327.1502380371094, 660.8145751953125, -373.575927734375, -7.8697967529296875, 129.61990356445312, 148.70953369140625, 367.3108825683594, -56.014251708984375, 139.59361267089844, 306.5485534667969, -320.9870910644531, 293.55810546875, 224.64952087402344, 466.4237365722656, 412.28546142578125, 988.4205322265625, 489.329345703125, -65.95408630371094, 465.7940368652344, 437.61248779296875, 233.99664306640625, 386.5852966308594, -7.051973342895508, 139.01829528808594, 503.0564270019531, 183.26016235351562, 54.501197814941406, 1009.7823486328125, -76.86134338378906, -88.12593841552734, 245.02459716796875, 816.519287109375, -28.1781005859375, 958.2681884765625, 366.4888610839844, -48.81284713745117, 276.0933532714844, -275.70263671875, 269.79559326171875, 843.5813598632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000389.npy"}
|
||||
{"epoch": 0.5712187958883994, "step": 390, "batch_size": 64, "mean": 241.33123779296875, "std": 392.37335205078125, "min": -662.3434448242188, "p10": -146.04956970214843, "median": 262.4533386230469, "p90": 565.7798278808594, "max": 2276.023193359375, "pos_frac": 0.78125, "sample": [265.7914733886719, 330.9603271484375, 614.4610595703125, 526.9127197265625, -662.3434448242188, -164.31375122070312, -263.0664367675781, 134.7674102783203, 409.492431640625, 259.1152038574219, 468.6695251464844, 14.385086059570312, 303.11669921875, 406.2097473144531, 221.37603759765625, 23.381683349609375, -107.93138885498047, 2276.023193359375, -141.86087036132812, 448.26007080078125, 128.82839965820312, 549.8502197265625, 560.0068359375, 118.86259460449219, 193.3622283935547, -94.099853515625, 124.82547760009766, -74.52963256835938, 166.9630584716797, 317.69012451171875, 183.3067626953125, -39.704437255859375, -147.8447265625, -117.98673248291016, 157.7218475341797, 568.2539672851562, 835.6634521484375, 577.0773315429688, -447.95635986328125, 404.6697692871094, 545.4137573242188, 640.1200561523438, 448.4581604003906, 143.59634399414062, 581.2738037109375, 179.99220275878906, 166.8238525390625, 101.85816955566406, 528.4368896484375, 361.9198913574219, 504.5764465332031, 293.7860107421875, 302.6939392089844, 325.13641357421875, 342.8270568847656, -46.633140563964844, 2.953948974609375, -318.06634521484375, -535.4544067382812, 285.99346923828125, 494.4248046875, 83.984619140625, 320.73394775390625, 361.9834289550781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000390.npy"}
|
||||
{"epoch": 0.5726872246696035, "step": 391, "batch_size": 64, "mean": 139.651611328125, "std": 288.9215393066406, "min": -628.4801635742188, "p10": -171.6012298583984, "median": 109.63602828979492, "p90": 468.2785949707031, "max": 1052.62109375, "pos_frac": 0.703125, "sample": [-46.38252639770508, 10.691123962402344, 105.75257110595703, -57.05317687988281, 176.31817626953125, 354.68048095703125, 42.33307647705078, 140.81146240234375, 35.20280838012695, 414.1448669433594, -95.23583221435547, 146.9530792236328, -221.19528198242188, -94.94139862060547, 28.302963256835938, 35.16602325439453, 396.17236328125, 118.73019409179688, 41.53412628173828, -141.79412841796875, 186.6715850830078, 470.04876708984375, 555.3619384765625, 464.148193359375, -74.0901870727539, 96.99613952636719, 1052.62109375, -233.38746643066406, -114.11642456054688, 310.539306640625, 358.4842529296875, -100.09249114990234, 82.31626892089844, -2.7690696716308594, -82.55779266357422, 231.91737365722656, 105.71802520751953, -62.308982849121094, 478.8665466308594, 154.57879638671875, 76.37505340576172, -194.22811889648438, 163.52688598632812, 165.42514038085938, 726.9627685546875, -326.4729919433594, 599.2673950195312, -75.91387176513672, -514.7798461914062, 26.593467712402344, 56.38067626953125, 281.1735534667969, -184.37570190429688, 404.0414123535156, 416.1945495605469, 113.51948547363281, 805.5078735351562, 359.80072021484375, 328.70501708984375, -628.4801635742188, 259.0635681152344, 247.0269775390625, 308.56671142578125, 254.68626403808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000391.npy"}
|
||||
{"epoch": 0.5741556534508077, "step": 392, "batch_size": 64, "mean": 286.5428771972656, "std": 289.2859802246094, "min": -204.1536102294922, "p10": -62.56724395751948, "median": 232.6835479736328, "p90": 632.4061462402344, "max": 1248.43603515625, "pos_frac": 0.859375, "sample": [-0.12270355224609375, 182.00466918945312, -150.4317169189453, 56.245296478271484, 229.11781311035156, -111.97147369384766, 640.813232421875, 324.1934814453125, 365.55938720703125, 360.53131103515625, 612.7896118164062, -204.1536102294922, 303.97540283203125, 149.85165405273438, 178.73838806152344, 31.780973434448242, 656.231201171875, 802.0707397460938, 575.5274047851562, 221.62144470214844, -112.40550994873047, 419.2510986328125, 511.5806579589844, 45.17939758300781, 362.77069091796875, 406.688720703125, 219.268798828125, 1248.43603515625, -148.051025390625, 215.1832275390625, 236.24928283691406, 269.7291259765625, 437.50836181640625, 186.2528076171875, 558.4568481445312, 247.9419403076172, 377.21685791015625, 180.32130432128906, 211.9496307373047, 142.03375244140625, 396.6776123046875, 441.7107238769531, 61.67694091796875, -15.624198913574219, 534.3338623046875, 910.8653564453125, 441.10302734375, 469.3602600097656, 395.95343017578125, -173.5148162841797, 138.13694763183594, 117.75547790527344, 91.87554931640625, 328.43212890625, 170.78036499023438, 138.68743896484375, 310.46697998046875, 34.36906051635742, 1052.8690185546875, 50.10823059082031, -82.6856918334961, 79.60610961914062, 407.7310485839844, 798.134033203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000392.npy"}
|
||||
{"epoch": 0.5756240822320118, "step": 393, "batch_size": 64, "mean": 223.21453857421875, "std": 358.58782958984375, "min": -298.7071228027344, "p10": -203.97174377441405, "median": 242.853759765625, "p90": 617.4684753417971, "max": 1671.9049072265625, "pos_frac": 0.703125, "sample": [253.9451446533203, 755.0984497070312, -2.7195167541503906, -110.15015411376953, 421.7265625, 518.7972412109375, -281.5785217285156, 415.9654541015625, 290.9287414550781, 244.27236938476562, 83.40283203125, 268.4793701171875, 189.88275146484375, 347.6634521484375, 352.11395263671875, 78.35590362548828, 1072.04052734375, 1003.4053955078125, 709.447021484375, -6.717964172363281, 241.43515014648438, 554.0103149414062, -273.4547119140625, -273.648193359375, -125.36079406738281, -22.725902557373047, -222.5099639892578, 264.8493347167969, 287.74444580078125, 306.7161865234375, 172.64207458496094, 444.80743408203125, 477.688232421875, 201.03781127929688, 569.6692504882812, 347.781494140625, 251.14730834960938, 142.22695922851562, 416.46014404296875, -104.63447570800781, 6.2047882080078125, 154.9403839111328, 637.953857421875, -180.30856323242188, -244.025634765625, 506.8775634765625, 304.1127624511719, -212.72055053710938, 29.52594757080078, 174.21768188476562, 141.603515625, 251.6863250732422, -173.2974090576172, -135.04605102539062, -183.557861328125, 380.3001403808594, 418.6398620605469, -298.7071228027344, 307.43634033203125, 43.408729553222656, -121.46366119384766, 1671.9049072265625, 651.3096923828125, -105.50674438476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000393.npy"}
|
||||
{"epoch": 0.5770925110132159, "step": 394, "batch_size": 64, "mean": 233.43197631835938, "std": 315.04718017578125, "min": -366.4601135253906, "p10": -144.96865615844723, "median": 170.14122009277344, "p90": 657.9972045898439, "max": 1118.88671875, "pos_frac": 0.8125, "sample": [106.49180603027344, 346.41973876953125, -197.7609100341797, 369.4381408691406, 173.30319213867188, -194.99179077148438, 42.0595588684082, 113.85417175292969, 525.8363037109375, 74.20751953125, 614.4022216796875, -26.99805450439453, -366.4601135253906, 40.41758728027344, -50.77703857421875, -229.82949829101562, -119.65677642822266, 797.1328125, 676.0738525390625, -199.73033142089844, 527.33154296875, -238.47271728515625, 276.61529541015625, 330.0809631347656, 195.11488342285156, 407.93109130859375, 148.09535217285156, 7.9072418212890625, -24.981224060058594, 613.0214233398438, 615.818359375, 791.6774291992188, -155.8166046142578, 374.816162109375, 126.0479736328125, 48.32069396972656, 150.66397094726562, 34.40953826904297, 136.37799072265625, 679.3433837890625, 34.31336212158203, 534.487060546875, 18.814380645751953, 191.2201385498047, 159.06333923339844, 30.6650390625, 507.6979064941406, 956.7437744140625, 266.07586669921875, 166.979248046875, 471.360107421875, 296.78863525390625, 1118.88671875, 980.9712524414062, 347.7559814453125, 42.124900817871094, 90.22850036621094, -98.10012817382812, 248.99612426757812, 353.9927673339844, 217.65255737304688, 1.7462749481201172, 174.304931640625, 289.14288330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000394.npy"}
|
||||
{"epoch": 0.57856093979442, "step": 395, "batch_size": 64, "mean": 252.64047241210938, "std": 343.5081787109375, "min": -531.4727783203125, "p10": -135.5839431762695, "median": 209.6084747314453, "p90": 761.6410522460939, "max": 1074.7318115234375, "pos_frac": 0.734375, "sample": [479.1500549316406, -273.830322265625, 195.49468994140625, 60.26277160644531, 270.43603515625, 334.767578125, 388.1055603027344, 237.21542358398438, -221.790283203125, 288.33795166015625, -152.7000274658203, 491.579833984375, 476.8879089355469, -51.702171325683594, 115.6126937866211, -27.566028594970703, 721.9281005859375, 286.5360412597656, 968.7171630859375, -54.71116638183594, 628.46826171875, 16.474403381347656, 490.48968505859375, -194.11924743652344, 247.23342895507812, -11.562347412109375, 175.40286254882812, 449.63275146484375, -104.1974105834961, 5.111289978027344, 874.4866943359375, 980.3643188476562, 542.9449462890625, 137.72781372070312, 346.57940673828125, 44.362335205078125, 152.04269409179688, 387.811767578125, 968.20703125, 155.12890625, 547.345703125, 517.1116943359375, -531.4727783203125, -38.80384826660156, -203.38247680664062, 354.038330078125, -91.80088806152344, 844.6130981445312, -9.383718490600586, -109.44215393066406, 84.333984375, -146.78756713867188, 8.481069564819336, 1074.7318115234375, 778.660888671875, 223.72225952148438, 300.36907958984375, 535.4404907226562, 158.5110626220703, 480.7515563964844, 441.4140625, 66.69889068603516, 103.2699203491211, -44.75117874145508], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000395.npy"}
|
||||
{"epoch": 0.580029368575624, "step": 396, "batch_size": 64, "mean": 223.44097900390625, "std": 269.5262756347656, "min": -572.9200439453125, "p10": -44.61759910583493, "median": 162.15258026123047, "p90": 580.7717254638675, "max": 978.7274169921875, "pos_frac": 0.828125, "sample": [152.63963317871094, 628.5917358398438, 88.07463836669922, 193.48367309570312, 109.64273071289062, 610.4725952148438, 139.85574340820312, 229.32733154296875, 130.25958251953125, 262.46478271484375, 360.9987487792969, 301.32647705078125, 223.539306640625, 412.2607727050781, -133.66275024414062, 169.56349182128906, 210.61703491210938, 14.241714477539062, 64.26347351074219, 125.96556854248047, 77.54801177978516, 403.9941711425781, 882.7412109375, 321.0607604980469, 79.58920288085938, 476.6116027832031, 314.65716552734375, 129.02386474609375, 184.61871337890625, 647.5913696289062, -63.1528205871582, 391.18310546875, 141.04879760742188, 284.6595153808594, 63.7856559753418, 186.72413635253906, 468.208984375, -198.24859619140625, 479.54754638671875, 709.7294921875, 422.5369873046875, -12.45574951171875, -76.0059814453125, 511.4696960449219, -1.83929443359375, -58.401248931884766, 501.299560546875, -10.836326599121094, 43.640106201171875, 260.5312194824219, 911.5062866210938, 91.25406646728516, 14.974365234375, -104.87432861328125, -11.990127563476562, 302.353515625, 269.3258361816406, 138.63900756835938, 154.74166870117188, -572.9200439453125, 90.98818969726562, 978.7274169921875, 50.90643310546875, 131.80355834960938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000396.npy"}
|
||||
{"epoch": 0.5814977973568282, "step": 397, "batch_size": 64, "mean": 294.6309509277344, "std": 310.2337951660156, "min": -315.20025634765625, "p10": -17.836246871948227, "median": 239.31919860839844, "p90": 732.7612304687501, "max": 1100.436279296875, "pos_frac": 0.875, "sample": [38.494571685791016, 545.5816650390625, 7.580108642578125, 366.1274108886719, 155.13739013671875, 81.11717224121094, 269.2242431640625, 380.63671875, 520.6385498046875, 197.1071014404297, 757.5185546875, 4.9642791748046875, 607.0662841796875, -117.91143798828125, 158.33953857421875, 391.1460876464844, 228.49502563476562, -232.50326538085938, 442.1328125, 715.1552734375, 250.14337158203125, 740.306640625, 326.677490234375, 438.7029113769531, 1100.436279296875, -1.7455902099609375, 707.9205322265625, 447.82354736328125, -178.03121948242188, 147.73912048339844, 553.8992919921875, 269.35418701171875, 108.78329467773438, 951.1329345703125, 465.3045349121094, 196.60128784179688, 56.2352180480957, 381.12957763671875, 171.9725341796875, 168.97300720214844, -315.20025634765625, -186.97879028320312, 84.34886932373047, -268.6643371582031, 483.4177551269531, 128.9097442626953, 61.94788360595703, 120.46666717529297, -24.732242584228516, 526.6650390625, 46.6694450378418, 767.023681640625, 178.33505249023438, 955.0327758789062, 137.08169555664062, 1008.1690673828125, 128.32594299316406, 189.70703125, 536.368408203125, 152.00582885742188, 294.4625244140625, 320.975830078125, 412.5562438964844, 300.07977294921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000397.npy"}
|
||||
{"epoch": 0.5829662261380323, "step": 398, "batch_size": 64, "mean": 269.42486572265625, "std": 354.5132141113281, "min": -374.10101318359375, "p10": -126.43796234130858, "median": 171.44021606445312, "p90": 670.9688049316406, "max": 1393.1993408203125, "pos_frac": 0.78125, "sample": [121.81124877929688, 62.54380798339844, 456.1236877441406, 700.7810668945312, -130.54249572753906, 78.37061309814453, 318.70635986328125, -307.9384765625, 92.97990417480469, -131.15887451171875, 551.9505615234375, -374.10101318359375, 673.40625, 407.06072998046875, 488.9301452636719, 1049.5216064453125, 165.6509246826172, 64.76165008544922, 618.8248291015625, 305.91827392578125, 523.19140625, 157.30850219726562, 618.2608032226562, 466.8486633300781, 1222.838134765625, -12.82244873046875, 236.67669677734375, 177.22950744628906, 665.2814331054688, 401.8486633300781, 40.43408203125, 186.37738037109375, -116.8607177734375, -24.169021606445312, 350.8211975097656, 96.06849670410156, 85.11580657958984, 385.23211669921875, 47.126441955566406, 441.20391845703125, 101.29701232910156, 476.64056396484375, 136.43515014648438, 63.80670928955078, 141.70431518554688, -50.537757873535156, 398.43670654296875, 4.473358154296875, -33.48603820800781, 53.73724365234375, 210.4791259765625, 1393.1993408203125, 610.7047119140625, 787.4501342773438, -352.8045654296875, 543.0606689453125, 435.009033203125, 562.0365600585938, 867.43505859375, 164.24705505371094, -54.36705017089844, -35.47314453125, -140.7406005859375, -201.16558837890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000398.npy"}
|
||||
{"epoch": 0.5844346549192364, "step": 399, "batch_size": 64, "mean": 237.1878662109375, "std": 330.4715881347656, "min": -453.79248046875, "p10": -197.9011688232421, "median": 203.0102081298828, "p90": 641.5745605468751, "max": 1088.0712890625, "pos_frac": 0.75, "sample": [137.57180786132812, 338.63726806640625, 93.27274322509766, -284.42962646484375, 611.968994140625, -49.98015594482422, 647.7149658203125, 37.25083541870117, 147.2471923828125, -39.489501953125, 272.3608093261719, -261.1718444824219, 679.186279296875, -34.28271484375, 627.2469482421875, 186.9298553466797, 317.72930908203125, -86.13488006591797, 18.676355361938477, 90.84559631347656, 792.7872314453125, 140.66612243652344, 384.22332763671875, 220.76637268066406, 605.9594116210938, 801.9561767578125, 203.14801025390625, 382.1923828125, -308.23095703125, 239.10885620117188, -16.719161987304688, 419.2611389160156, 202.87240600585938, 139.1441192626953, 442.5179138183594, -234.01034545898438, 1088.0712890625, 503.9341125488281, 607.31298828125, -295.05364990234375, 387.6853942871094, 510.1524353027344, 192.8419647216797, -453.79248046875, -44.78609848022461, 409.08203125, 70.368896484375, 800.1270751953125, 18.861534118652344, -113.64642333984375, 465.6021423339844, 966.3667602539062, 511.27325439453125, 122.25120544433594, 564.1085815429688, 229.23658752441406, 178.25466918945312, 246.3355712890625, -358.7386779785156, 263.4276428222656, 442.1190490722656, -90.11674499511719, 191.73626708984375, -99.78597259521484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000399.npy"}
|
||||
{"epoch": 0.5859030837004405, "step": 400, "batch_size": 64, "mean": 338.11065673828125, "std": 315.2834167480469, "min": -499.58160400390625, "p10": -80.38878707885739, "median": 339.76690673828125, "p90": 721.0635009765625, "max": 1158.21533203125, "pos_frac": 0.8125, "sample": [274.6685485839844, 478.17816162109375, 454.07952880859375, 256.9183654785156, 578.3345947265625, -101.78768920898438, 473.0855712890625, 231.01199340820312, -103.56609344482422, 596.1454467773438, 217.96438598632812, 81.7364273071289, 507.1319274902344, 295.861083984375, -499.58160400390625, 755.3762817382812, 681.1170654296875, 179.63580322265625, 419.806640625, 392.49847412109375, 793.414306640625, 587.2797241210938, 76.11917114257812, 376.0484619140625, 100.90636444091797, 353.34490966796875, 76.44090270996094, 538.8584594726562, 362.59808349609375, 379.92694091796875, 410.8214416503906, 174.114990234375, -95.45459747314453, 405.85369873046875, 264.5882873535156, -242.2035369873047, -10.225349426269531, 778.3825073242188, 326.18890380859375, 551.484619140625, -22.286331176757812, 192.35821533203125, 325.15447998046875, 1015.8079833984375, -16.149534225463867, 609.373779296875, 324.6605224609375, 176.01002502441406, -6.002025604248047, 1158.21533203125, 708.9314575195312, -95.88784790039062, 243.02227783203125, 469.2120361328125, 319.3391418457031, 668.3526611328125, -272.98870849609375, 725.8497314453125, 651.5064697265625, 389.264404296875, 252.47113037109375, 709.8956298828125, 781.104248046875, -45.2352294921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000400.npy"}
|
||||
{"epoch": 0.5873715124816447, "step": 401, "batch_size": 64, "mean": 192.7608642578125, "std": 395.2124328613281, "min": -816.3862915039062, "p10": -215.9897262573242, "median": 119.4109878540039, "p90": 776.7494018554689, "max": 1144.490966796875, "pos_frac": 0.6875, "sample": [111.43016052246094, 527.596923828125, -44.81599044799805, 232.37152099609375, 551.397705078125, 70.3274154663086, -359.6578369140625, -371.9735412597656, -816.3862915039062, -74.66069030761719, 71.81664276123047, 239.63571166992188, -223.515869140625, 85.41405487060547, -27.94573974609375, -28.4982967376709, 374.6968688964844, 263.9377746582031, 127.39181518554688, 11.753372192382812, -71.85562133789062, -191.15733337402344, -58.22547149658203, 1006.0535888671875, 40.847755432128906, 669.711181640625, 946.5023803710938, 463.19677734375, 795.6967163085938, 204.19239807128906, 100.48585510253906, 294.9466247558594, -58.218990325927734, 49.30817413330078, -92.74811553955078, -497.4604797363281, 1000.93359375, -67.13166809082031, 243.8336944580078, 551.4302978515625, 195.38597106933594, 523.5400390625, 316.21502685546875, -40.412899017333984, 278.874267578125, 191.36279296875, 181.16200256347656, -192.7491912841797, 1144.490966796875, -507.85302734375, 650.81884765625, 483.10467529296875, 370.8598937988281, 88.43657684326172, 912.1082763671875, 882.2730712890625, 53.71186065673828, 341.84649658203125, 28.566009521484375, 59.384395599365234, 128.60955810546875, 732.5390014648438, -337.8073425292969, -198.42872619628906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000401.npy"}
|
||||
{"epoch": 0.5888399412628488, "step": 402, "batch_size": 64, "mean": 350.63134765625, "std": 317.7698669433594, "min": -262.6744689941406, "p10": -1.5566047668457004, "median": 281.19447326660156, "p90": 821.0599975585938, "max": 1423.74951171875, "pos_frac": 0.890625, "sample": [468.379150390625, 28.1163387298584, 1.1936264038085938, 436.416259765625, 225.4127197265625, 426.92327880859375, -7.5193023681640625, 275.0831604003906, -42.156578063964844, 682.3616333007812, 381.35546875, 579.9024658203125, 681.9989013671875, 195.39942932128906, -90.49908447265625, 596.1932373046875, -2.7352752685546875, 267.7616271972656, 211.59449768066406, 431.19158935546875, 563.841796875, 205.81478881835938, 601.0399780273438, -262.6744689941406, 356.1004943847656, 510.20928955078125, 52.177093505859375, 946.5162353515625, 49.63157653808594, 898.30126953125, 210.21804809570312, 357.26983642578125, 198.56187438964844, 454.2655029296875, 129.52615356445312, 259.04718017578125, 879.95458984375, 99.70925903320312, 180.511474609375, 726.011962890625, 8.224777221679688, -120.35443115234375, 810.0391845703125, 945.333984375, 231.96224975585938, 473.9577941894531, 825.783203125, 152.26039123535156, 319.2220153808594, 1423.74951171875, 464.02435302734375, 574.4124755859375, 48.59202575683594, 919.7720947265625, 218.4840087890625, 325.99798583984375, 333.0675048828125, 274.7002258300781, 156.92318725585938, -168.98251342773438, 359.2623291015625, 193.17019653320312, 221.09085083007812, 287.3057861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000402.npy"}
|
||||
{"epoch": 0.5903083700440529, "step": 403, "batch_size": 64, "mean": 268.3475341796875, "std": 307.86566162109375, "min": -344.90106201171875, "p10": -77.46739959716794, "median": 258.1399383544922, "p90": 667.5409179687501, "max": 1165.1241455078125, "pos_frac": 0.8125, "sample": [225.11199951171875, 20.484413146972656, 83.28958892822266, 210.86529541015625, 378.8893127441406, 40.09953689575195, 250.79547119140625, 375.0314025878906, -44.15587615966797, -200.82862854003906, 145.97357177734375, -29.252044677734375, -344.90106201171875, 534.738525390625, 125.41619873046875, 864.0804443359375, 273.2659912109375, 22.520925521850586, 420.4881591796875, 295.4149169921875, 357.97576904296875, 76.07241821289062, 741.47265625, -19.70227813720703, -91.74376678466797, 576.7831420898438, 524.3794555664062, 682.508544921875, 265.4844055175781, 921.3526611328125, -139.6262664794922, 479.5031433105469, 614.6871337890625, 25.774715423583984, -18.52124786376953, 530.19775390625, -119.29427337646484, 275.7547607421875, 632.616455078125, 511.9295654296875, -174.01719665527344, 294.3140563964844, 1165.1241455078125, 20.53888511657715, 530.9653930664062, 130.77745056152344, 283.7168884277344, 764.1170654296875, 366.6388854980469, 945.20849609375, 296.16412353515625, 486.5464172363281, 366.1875305175781, 121.06269836425781, -42.4117431640625, 18.233474731445312, 51.172996520996094, 348.1732482910156, 4.2194671630859375, 99.11949157714844, 355.41070556640625, 229.3226318359375, -139.47171020507812, 178.19479370117188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000403.npy"}
|
||||
{"epoch": 0.591776798825257, "step": 404, "batch_size": 64, "mean": 263.1905212402344, "std": 367.5498352050781, "min": -581.5407104492188, "p10": -209.05089416503904, "median": 251.0316619873047, "p90": 733.0750488281253, "max": 1286.637939453125, "pos_frac": 0.75, "sample": [-298.51263427734375, 284.09375, 55.01439666748047, 616.640380859375, 596.3072509765625, 182.30931091308594, 195.08485412597656, 1087.9166259765625, 137.66217041015625, 797.5580444335938, 1286.637939453125, 237.02291870117188, 291.6031494140625, 872.0501708984375, -106.12085723876953, -360.7832946777344, 888.8303833007812, 100.57665252685547, -303.8775634765625, 129.68980407714844, -80.98379516601562, -18.753929138183594, -102.99214172363281, 435.85009765625, 236.37017822265625, 308.97784423828125, -224.39413452148438, 459.8589782714844, 256.18060302734375, 337.0788269042969, 538.7968139648438, -266.0541687011719, 399.61712646484375, 150.19412231445312, 582.7069091796875, 112.95793914794922, -160.28651428222656, 168.7943572998047, 308.5280456542969, 489.911376953125, -581.5407104492188, 414.8524169921875, 94.01327514648438, 607.7596435546875, 245.88272094726562, -256.20574951171875, -173.25, 542.541748046875, 619.2855224609375, -103.61613464355469, 400.5937805175781, -5.037635803222656, 385.27093505859375, 394.257080078125, 507.84796142578125, 763.2520751953125, 317.1148986816406, 48.98573303222656, 2.9203243255615234, -97.00331115722656, 895.2898559570312, 139.79254150390625, 662.6619873046875, 396.46295166015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000404.npy"}
|
||||
{"epoch": 0.593245227606461, "step": 405, "batch_size": 64, "mean": 215.46173095703125, "std": 391.1243896484375, "min": -532.3126220703125, "p10": -288.88966979980466, "median": 167.55264282226562, "p90": 787.842077636719, "max": 1109.4901123046875, "pos_frac": 0.703125, "sample": [872.1931762695312, 804.448974609375, 243.83645629882812, -202.0233917236328, -487.64556884765625, -41.402862548828125, -458.3470458984375, 129.46060180664062, 401.4983215332031, -254.40301513671875, 52.582725524902344, 1108.689208984375, 546.4450073242188, 120.33370971679688, 82.29390716552734, 1109.4901123046875, -47.166046142578125, -102.77518463134766, 65.10476684570312, -149.65115356445312, 139.59521484375, 254.67262268066406, 114.17478942871094, 491.7491455078125, 169.30117797851562, 150.04893493652344, -156.27149963378906, 569.5997314453125, -387.15655517578125, -71.21422576904297, 427.5127258300781, -20.481645584106445, 422.63922119140625, 9.067144393920898, 416.478271484375, 165.80410766601562, 276.9541320800781, -299.84375, 25.557714462280273, 69.37435913085938, 521.1126708984375, 201.93466186523438, 749.0926513671875, 380.0829162597656, -33.28982925415039, 178.11068725585938, 603.2662353515625, -288.8740539550781, 251.11691284179688, 283.82855224609375, 375.43292236328125, -288.8963623046875, 229.93675231933594, 945.34423828125, 91.6680679321289, -532.3126220703125, 475.88031005859375, 329.8471374511719, -50.290069580078125, 989.773681640625, 857.3580322265625, -328.0238037109375, 550.3795776367188, 736.547607421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000405.npy"}
|
||||
{"epoch": 0.5947136563876652, "step": 406, "batch_size": 64, "mean": 283.2893981933594, "std": 337.6618347167969, "min": -351.65081787109375, "p10": -88.09148178100584, "median": 221.9390869140625, "p90": 704.5681274414063, "max": 1229.801513671875, "pos_frac": 0.8125, "sample": [447.8423156738281, 145.60997009277344, 278.2334899902344, 84.13882446289062, 233.28643798828125, 211.507568359375, 186.35281372070312, -323.0037536621094, 1197.2091064453125, 170.15374755859375, 487.1400451660156, 244.3678436279297, 1229.801513671875, 574.9722900390625, 308.0594482421875, -11.247119903564453, 528.6973266601562, 307.2388610839844, 547.2990112304688, -97.00940704345703, 867.5145263671875, 355.8169250488281, 347.3258056640625, -110.66578674316406, 111.4843521118164, 130.2452850341797, 35.59765625, -67.28298950195312, 525.5972290039062, -5.918487548828125, -116.84254455566406, 196.6025848388672, 467.8813781738281, -313.52764892578125, -159.62017822265625, 513.1633911132812, 833.3294677734375, 199.0682373046875, 156.61163330078125, -13.875608444213867, 171.34268188476562, 199.34384155273438, 429.0802307128906, -38.347023010253906, 4.813385009765625, 71.49185943603516, 719.9649658203125, 346.8013610839844, 35.5540771484375, 232.37060546875, 672.2633666992188, 681.026611328125, 442.0760192871094, 21.202621459960938, -351.65081787109375, 12.052375793457031, 714.6573486328125, 554.3174438476562, 1074.270751953125, 436.8966064453125, 297.055419921875, 562.5230712890625, 97.22752380371094, 41.030982971191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000406.npy"}
|
||||
{"epoch": 0.5961820851688693, "step": 407, "batch_size": 64, "mean": 171.37548828125, "std": 324.82891845703125, "min": -873.1190795898438, "p10": -261.7927963256836, "median": 158.5663299560547, "p90": 580.8947753906251, "max": 849.3638305664062, "pos_frac": 0.75, "sample": [-313.7042236328125, 556.8092041015625, 510.5126647949219, -352.3411560058594, -290.2933654785156, -91.6382064819336, 266.89129638671875, -137.68228149414062, -329.2121887207031, 278.70526123046875, 395.2393798828125, 122.71168518066406, 352.6322021484375, 686.2410278320312, 597.0001831054688, 30.121797561645508, -45.96975326538086, 478.98687744140625, 72.60370635986328, -90.11820983886719, 355.2977600097656, 104.03692626953125, 102.75935363769531, 110.13717651367188, 79.23686218261719, 508.9378662109375, 213.2528076171875, 224.27590942382812, 112.37825012207031, 148.89378356933594, -277.3645935058594, 188.21139526367188, 12.7587890625, -116.0595474243164, 37.433982849121094, 114.04093933105469, 513.0505981445312, 755.3618774414062, 62.40315628051758, -873.1190795898438, 714.78173828125, 385.4932861328125, 426.88580322265625, 8.893836975097656, 390.193603515625, 168.23887634277344, 205.2099609375, 849.3638305664062, 381.1499328613281, 287.6253662109375, 623.7909545898438, -204.2164306640625, -86.45986938476562, 548.8045654296875, 6.063713073730469, 86.3484878540039, 173.0283660888672, 523.3776245117188, -225.45860290527344, -367.6953125, 591.2171630859375, 306.13128662109375, -179.67189025878906, 281.5140075683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000407.npy"}
|
||||
{"epoch": 0.5976505139500734, "step": 408, "batch_size": 64, "mean": 199.35397338867188, "std": 262.37542724609375, "min": -281.5787353515625, "p10": -160.31171722412108, "median": 222.6162567138672, "p90": 551.3729675292971, "max": 820.1580810546875, "pos_frac": 0.765625, "sample": [44.22380065917969, 673.7825317382812, 820.1580810546875, -26.48968505859375, 424.415771484375, 359.86395263671875, -101.381103515625, 145.37135314941406, -214.15423583984375, -148.94407653808594, -26.02161979675293, -281.5787353515625, 84.47077178955078, 11.3223876953125, 19.655067443847656, -229.9952850341797, 153.20115661621094, 333.86590576171875, 286.340087890625, 92.32295227050781, 212.79771423339844, 366.81329345703125, -106.61273193359375, 415.2051086425781, -122.52421569824219, 276.7889404296875, -22.022958755493164, 263.8956604003906, 78.27295684814453, 682.4100341796875, 190.1891326904297, 254.06973266601562, 519.43798828125, 20.782424926757812, -266.83489990234375, -177.50100708007812, 280.19366455078125, 269.0115966796875, 76.21672058105469, 226.70370483398438, 348.6253967285156, 391.3998718261719, 348.55517578125, 436.572998046875, 218.52880859375, -245.16567993164062, 716.4244995117188, -8.166879653930664, 31.269428253173828, 68.00973510742188, 154.15499877929688, 565.0593872070312, 267.9480895996094, -165.18356323242188, 335.35284423828125, 311.9005432128906, 48.61157989501953, 424.47393798828125, 238.15513610839844, 322.1846008300781, 665.7130126953125, 609.3946533203125, 476.03594970703125, 341.0777893066406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000408.npy"}
|
||||
{"epoch": 0.5991189427312775, "step": 409, "batch_size": 64, "mean": 197.1689453125, "std": 284.3892822265625, "min": -375.3445129394531, "p10": -113.63123474121092, "median": 152.73265838623047, "p90": 554.8817016601564, "max": 939.827880859375, "pos_frac": 0.734375, "sample": [-39.148162841796875, -360.309814453125, 275.1625061035156, 211.21517944335938, 209.88677978515625, 326.0477294921875, -96.43924713134766, -91.885986328125, -55.665283203125, 139.64671325683594, 501.2615966796875, 504.5274353027344, -284.5538635253906, 153.34951782226562, 405.0556640625, 134.28067016601562, 219.48709106445312, 278.8916320800781, 170.95986938476562, 791.273193359375, 307.76580810546875, -120.99922943115234, 328.00823974609375, 61.00645065307617, 634.7122192382812, -131.95819091796875, -4.688819885253906, 21.070587158203125, -41.910980224609375, 152.1157989501953, 421.68243408203125, 728.9144287109375, 526.6717529296875, -375.3445129394531, 58.897682189941406, 939.827880859375, -1.8951644897460938, 96.9169921875, -336.50909423828125, 114.31156158447266, 479.3385314941406, -6.475547790527344, 566.9716796875, 101.80342102050781, 240.47705078125, 264.2730407714844, 80.94062042236328, 146.8148193359375, 802.3175659179688, 518.5272216796875, -34.35120391845703, 188.07666015625, -160.51124572753906, 204.75267028808594, 197.64576721191406, 445.4736022949219, 358.1229248046875, 319.9856872558594, 114.98721313476562, 69.00715637207031, 766.2042236328125, -24.255599975585938, 68.21160888671875, 138.8350830078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000409.npy"}
|
||||
{"epoch": 0.6005873715124816, "step": 410, "batch_size": 64, "mean": 228.97573852539062, "std": 282.6621398925781, "min": -582.3479614257812, "p10": -92.02460708618163, "median": 231.5849609375, "p90": 598.9407226562502, "max": 871.841064453125, "pos_frac": 0.796875, "sample": [208.71292114257812, 292.20361328125, -119.7322998046875, 72.54120635986328, 498.6546630859375, 341.7688903808594, 515.848388671875, 484.3288269042969, 424.0709228515625, 15.912897109985352, 150.48255920410156, 229.12741088867188, 12.050643920898438, 469.5980224609375, -97.88434600830078, -129.40777587890625, 625.3052978515625, -418.9066162109375, -8.84006118774414, 369.0472412109375, 412.3575439453125, 707.2344970703125, -78.35188293457031, 83.19996643066406, 618.7451171875, 642.4994506835938, 431.50616455078125, 19.145381927490234, 364.531494140625, 871.841064453125, 291.5876770019531, 26.135013580322266, 356.3816833496094, 60.098175048828125, 382.04296875, -30.755889892578125, -54.171485900878906, -66.48969268798828, 124.225341796875, -150.7796630859375, 178.65126037597656, 440.92034912109375, 162.30816650390625, 403.3916015625, 203.16653442382812, -582.3479614257812, 234.04251098632812, 247.3255157470703, 685.8773803710938, 347.77716064453125, 109.2735595703125, -3.1500492095947266, 418.04046630859375, 538.6793212890625, 293.14227294921875, 725.6133422851562, 163.92176818847656, 347.3605041503906, -348.66314697265625, 111.75529479980469, 244.16183471679688, 29.59474754333496, 552.73046875, 205.0089569091797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000410.npy"}
|
||||
{"epoch": 0.6020558002936858, "step": 411, "batch_size": 64, "mean": 230.6470947265625, "std": 336.9100341796875, "min": -545.6041259765625, "p10": -82.0435432434082, "median": 167.1031723022461, "p90": 648.199383544922, "max": 1370.7257080078125, "pos_frac": 0.734375, "sample": [247.113525390625, 18.087059020996094, -169.723876953125, 83.21456146240234, 564.8941650390625, 91.48812103271484, 74.29931640625, 261.4012145996094, 409.04461669921875, 386.96563720703125, 146.041259765625, -33.161048889160156, 180.329833984375, -54.610958099365234, 31.927160263061523, -13.891481399536133, 466.81427001953125, 281.0556945800781, 82.98577880859375, 421.1095886230469, 867.9778442382812, 346.6117858886719, -184.6480712890625, -23.078039169311523, 140.20785522460938, 160.04112243652344, -3.2692108154296875, 273.716064453125, 114.55447387695312, 1054.056640625, 129.46444702148438, 434.4113464355469, 861.0021362304688, -72.0785140991211, 520.5014038085938, 18.876739501953125, 376.7003479003906, 620.5962524414062, 692.9749145507812, 177.00833129882812, 28.61549949645996, 369.80511474609375, 148.232421875, 312.030029296875, 154.97256469726562, 331.3791809082031, -28.614391326904297, -545.6041259765625, 1370.7257080078125, 262.2427673339844, 176.1421661376953, 377.68804931640625, 584.3035888671875, 657.39990234375, 626.7315063476562, -198.57901000976562, -21.46228790283203, -440.97882080078125, 174.16522216796875, -86.31427001953125, 786.0245361328125, -162.78965759277344, -24.944778442382812, -70.76884460449219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000411.npy"}
|
||||
{"epoch": 0.6035242290748899, "step": 412, "batch_size": 64, "mean": 243.07411193847656, "std": 318.76434326171875, "min": -477.32220458984375, "p10": -130.01181182861328, "median": 220.8287353515625, "p90": 599.2802490234376, "max": 1179.9510498046875, "pos_frac": 0.765625, "sample": [-158.27012634277344, 177.36895751953125, 382.7421875, 490.5201416015625, 380.5064697265625, -134.91415405273438, 733.2644653320312, -60.52442169189453, 208.12557983398438, 563.389404296875, 104.89566040039062, 382.8505554199219, 70.1719970703125, 334.79083251953125, -3.7786426544189453, 515.1806640625, 319.71771240234375, 66.65335845947266, -477.32220458984375, 232.75082397460938, 39.44672393798828, 40.982154846191406, 622.7360229492188, 203.26963806152344, 85.41773986816406, 412.4625549316406, -10.554534912109375, 377.0055236816406, -160.42611694335938, 212.01080322265625, -50.22783660888672, 43.81372833251953, 229.64666748046875, 295.9822998046875, 184.43260192871094, -50.03111267089844, -308.5654296875, -149.36512756347656, 114.39800262451172, 946.529541015625, 8.7999267578125, -434.18511962890625, 333.05438232421875, 554.8623657226562, 703.3291015625, -118.57301330566406, 538.2920532226562, 794.8870239257812, 48.076324462890625, 517.7313232421875, 354.9781494140625, -68.00656127929688, 110.49085998535156, 582.308837890625, 581.3128662109375, 1179.9510498046875, 18.076915740966797, 388.80511474609375, 415.45880126953125, -82.60403442382812, 467.65838623046875, 326.7456970214844, 606.5537109375, 521.6556396484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000412.npy"}
|
||||
{"epoch": 0.604992657856094, "step": 413, "batch_size": 64, "mean": 238.66033935546875, "std": 300.4703369140625, "min": -719.5596313476562, "p10": -132.94041671752922, "median": 246.29509735107422, "p90": 598.6456909179689, "max": 1012.4501342773438, "pos_frac": 0.859375, "sample": [67.4180908203125, 62.84218215942383, 142.92681884765625, 378.6451721191406, 147.27032470703125, 83.69859313964844, 210.25161743164062, 237.07003784179688, 50.259132385253906, 201.19052124023438, 663.1674194335938, 927.8065185546875, -56.62860870361328, 264.32366943359375, 31.955726623535156, -26.79835319519043, 374.66461181640625, 140.05712890625, 51.96748352050781, 781.2077026367188, 90.87196350097656, 462.4417724609375, 357.4571838378906, -719.5596313476562, 78.178955078125, 255.52015686035156, 280.58599853515625, 287.9378662109375, 628.1552734375, 1012.4501342773438, 357.983642578125, 354.6357421875, 560.3837890625, 342.8978271484375, -165.64547729492188, 560.2416381835938, 326.66522216796875, 161.2135009765625, 321.16961669921875, 318.0616455078125, 116.64958190917969, 158.5724639892578, 568.6671142578125, -295.1612548828125, 611.49365234375, 369.6800537109375, 204.16366577148438, -289.0309753417969, -285.0711669921875, -393.61846923828125, 200.90528869628906, 411.67742919921875, 445.093994140625, 641.022216796875, 19.439876556396484, 422.58660888671875, 307.7952575683594, 481.3161315917969, 188.7069549560547, 295.2352294921875, 114.18658447265625, 217.02027893066406, 399.60186767578125, -241.61383056640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000413.npy"}
|
||||
{"epoch": 0.6064610866372981, "step": 414, "batch_size": 64, "mean": 262.623779296875, "std": 312.8013916015625, "min": -688.9818115234375, "p10": -39.92876396179199, "median": 262.8604736328125, "p90": 673.5169311523438, "max": 987.6622314453125, "pos_frac": 0.796875, "sample": [349.039794921875, 266.2633361816406, 403.6090087890625, 266.36212158203125, -6.140041351318359, 900.2952270507812, 190.40399169921875, -688.9818115234375, 25.480026245117188, 201.2352294921875, 327.9910888671875, 266.9580383300781, 656.96533203125, 883.192138671875, 262.6720275878906, -89.10786437988281, 360.0267333984375, 92.89413452148438, -79.58162689208984, 161.7309112548828, 256.35113525390625, 514.55810546875, -34.9576301574707, 140.04043579101562, 473.1497802734375, 277.06353759765625, -192.88125610351562, -10.43931770324707, 447.870361328125, 86.04353332519531, 680.6104736328125, 487.52972412109375, -31.588603973388672, 339.55206298828125, 703.656494140625, 55.41451644897461, 395.87164306640625, -42.05924987792969, -411.7901611328125, 263.0489196777344, 329.08575439453125, 222.86190795898438, 590.2991333007812, -271.1608581542969, 74.41436767578125, 488.9073486328125, 31.074981689453125, 617.0107421875, 106.6562271118164, -5.505359649658203, 339.8614501953125, 179.1416473388672, 147.06692504882812, 123.9037857055664, 259.54833984375, 307.81982421875, 837.509033203125, -0.515960693359375, 449.0989990234375, 943.9207153320312, 987.6622314453125, 395.29888916015625, 150.40151977539062, 355.20703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000414.npy"}
|
||||
{"epoch": 0.6079295154185022, "step": 415, "batch_size": 64, "mean": 198.6478271484375, "std": 307.19818115234375, "min": -870.1644287109375, "p10": -131.20554428100584, "median": 202.99459075927734, "p90": 589.886767578125, "max": 965.5198974609375, "pos_frac": 0.75, "sample": [292.6638488769531, -121.0565185546875, 362.01678466796875, 774.4862670898438, -41.90464782714844, 745.4720458984375, 598.2584228515625, 166.2056121826172, -31.80596160888672, 220.4238739013672, 516.3147583007812, 377.5472106933594, 477.45184326171875, 418.77447509765625, -127.09093475341797, -132.9689483642578, 202.45291137695312, -291.5783386230469, 324.20416259765625, 214.90708923339844, 6.561796188354492, 32.29827880859375, 6.093841552734375, 668.5350341796875, -193.86978149414062, 127.66352081298828, 736.472900390625, -42.515235900878906, 83.69782257080078, 305.87957763671875, -41.5523681640625, 392.8213806152344, 278.3681945800781, 63.77850341796875, 965.5198974609375, -306.0286865234375, -110.17671203613281, 123.40936279296875, 461.73785400390625, 738.192138671875, 114.71861267089844, 354.2250671386719, 159.137451171875, 93.1400146484375, 136.10650634765625, 203.53627014160156, -9.924657821655273, 268.99481201171875, 420.13623046875, -136.4527587890625, 204.707763671875, 436.8247375488281, 343.6966247558594, 14.392730712890625, 338.021728515625, -18.10041618347168, 224.99844360351562, 3.652801513671875, 453.91522216796875, -200.47390747070312, 570.3529052734375, 112.96824645996094, -870.1644287109375, 253.38986206054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000415.npy"}
|
||||
{"epoch": 0.6093979441997063, "step": 416, "batch_size": 64, "mean": 318.10662841796875, "std": 295.9786071777344, "min": -174.4176025390625, "p10": -57.99577674865722, "median": 260.36253356933594, "p90": 729.4890197753906, "max": 1036.03076171875, "pos_frac": 0.859375, "sample": [862.2131958007812, 143.32427978515625, 183.832763671875, 420.01507568359375, -104.83345794677734, 571.1655883789062, 232.17327880859375, 110.77916717529297, 482.31781005859375, 729.8838500976562, -60.388282775878906, 6.634124755859375, 62.941200256347656, 280.9063720703125, 254.34927368164062, 541.0580444335938, 331.9503173828125, 15.111656188964844, 622.2493896484375, 173.4408721923828, 455.89837646484375, 547.436279296875, 63.3376579284668, 184.53024291992188, 594.5239868164062, 266.37579345703125, 347.9117126464844, 838.7980346679688, 137.45370483398438, 728.5677490234375, 217.5502471923828, 586.6007690429688, 1001.920654296875, -174.4176025390625, -154.61593627929688, 84.0592269897461, 532.6575317382812, 494.19769287109375, 24.50902557373047, 860.500732421875, -20.60938262939453, 1036.03076171875, 271.6890563964844, 205.94190979003906, 481.42694091796875, 553.5386352539062, 326.25604248046875, 604.135009765625, 231.24945068359375, 113.92267608642578, 14.707866668701172, 218.04652404785156, -113.86343383789062, 767.713623046875, 342.5980529785156, -52.706024169921875, 324.99566650390625, -60.262813568115234, 228.52210998535156, -68.40922546386719, 122.54528045654297, 244.81021118164062, 428.2255554199219, 661.3988037109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000416.npy"}
|
||||
{"epoch": 0.6108663729809104, "step": 417, "batch_size": 64, "mean": 259.7749328613281, "std": 270.2167663574219, "min": -260.778076171875, "p10": -81.41023635864256, "median": 221.3822250366211, "p90": 657.9479064941406, "max": 973.9407958984375, "pos_frac": 0.84375, "sample": [30.29218101501465, 17.794692993164062, -7.510747909545898, 286.67657470703125, 7.378124237060547, 567.2843017578125, -9.306442260742188, 214.48745727539062, -146.34939575195312, 661.4649047851562, 55.16342544555664, 618.2613525390625, 414.50872802734375, 127.20806884765625, 188.3666229248047, -84.95439147949219, -94.21867370605469, 708.942626953125, 703.4375, 62.681365966796875, 634.6029052734375, 299.6952209472656, 757.6536254882812, 213.9947509765625, 577.832275390625, 973.9407958984375, 241.98941040039062, 226.62246704101562, 156.54330444335938, 336.02935791015625, 82.4317398071289, 135.5421905517578, -125.53189849853516, 112.34992218017578, 327.745849609375, 600.7731323242188, 28.58157730102539, 291.2170715332031, 378.267822265625, 250.42604064941406, 91.10960388183594, 316.85894775390625, 216.14198303222656, 331.7861633300781, -122.0628662109375, 254.8885498046875, 130.30731201171875, 138.20938110351562, 524.6685180664062, 297.71759033203125, 466.1448974609375, -260.778076171875, 209.42337036132812, 408.9775085449219, 2.8169326782226562, -182.00302124023438, 234.47003173828125, 131.47869873046875, 699.777099609375, 475.81756591796875, 649.7415771484375, 167.2653045654297, 693.6636962890625, -73.14054107666016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000417.npy"}
|
||||
{"epoch": 0.6123348017621145, "step": 418, "batch_size": 64, "mean": 274.78253173828125, "std": 379.2420654296875, "min": -456.14959716796875, "p10": -148.7391632080078, "median": 201.94451904296875, "p90": 707.0582336425782, "max": 1325.1104736328125, "pos_frac": 0.75, "sample": [217.91510009765625, -138.88308715820312, 219.03915405273438, -14.702091217041016, 109.29606628417969, 1000.3069458007812, 66.66834259033203, 62.565189361572266, 713.6193237304688, 565.5133056640625, -7.75750732421875, -35.720855712890625, 385.3734130859375, -5.342155456542969, 1164.605224609375, 624.049072265625, 326.73028564453125, -165.44105529785156, 544.598388671875, 53.27069091796875, -311.5377502441406, 691.7490234375, -117.90530395507812, 546.1982421875, 229.4366455078125, 1.2489166259765625, 303.2669677734375, 630.7600708007812, 572.935546875, 185.97393798828125, 546.0245971679688, 68.30879211425781, 97.69226837158203, 834.6673583984375, -456.14959716796875, -58.730491638183594, -195.16244506835938, 456.51019287109375, 227.1468963623047, 484.4761962890625, 1325.1104736328125, -55.236366271972656, 71.417236328125, 146.74142456054688, 304.6390380859375, 149.11875915527344, 170.61041259765625, 138.53317260742188, -317.2598876953125, -282.4730224609375, 370.0955810546875, 77.22242736816406, 609.7135009765625, -103.13401794433594, 607.4625854492188, 304.71990966796875, 618.088134765625, 1116.237060546875, 138.98007202148438, 34.87888717651367, 593.9385986328125, 975.7083740234375, 321.3188171386719, -152.96319580078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000418.npy"}
|
||||
{"epoch": 0.6138032305433186, "step": 419, "batch_size": 64, "mean": 301.0840759277344, "std": 337.7168273925781, "min": -519.9669189453125, "p10": -121.15608062744133, "median": 303.91773986816406, "p90": 791.5593994140626, "max": 974.0504150390625, "pos_frac": 0.84375, "sample": [802.4600830078125, -421.76531982421875, 381.2976989746094, 568.5526123046875, 80.38782501220703, 865.969970703125, -29.62191390991211, -47.9073371887207, 26.90838623046875, 695.77001953125, 945.393798828125, 168.24429321289062, 314.9159851074219, 557.5628662109375, 460.9675598144531, 544.3795776367188, 226.12533569335938, 138.85076904296875, -519.9669189453125, 57.061973571777344, -152.54840087890625, 776.447021484375, -259.3453063964844, 292.91949462890625, 611.0562133789062, -218.6505126953125, 168.91629028320312, 124.4585952758789, 462.99639892578125, 916.4842529296875, 401.9894714355469, 95.904052734375, 424.6117248535156, 324.552734375, 432.78948974609375, 88.00371551513672, 121.93269348144531, 23.38677978515625, 521.532470703125, 356.31158447265625, 622.546630859375, 188.441162109375, 798.0361328125, 43.29740524291992, 88.5453872680664, 64.39994812011719, 62.31915283203125, -180.16702270507812, 704.4232177734375, 595.7628784179688, 608.2877197265625, 422.7348937988281, 99.248291015625, 974.0504150390625, 410.859130859375, 423.1433410644531, 490.30078125, 231.34173583984375, 119.44082641601562, 804.3206176757812, -209.92234802246094, -5.989288330078125, 169.67373657226562, 414.9504699707031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000419.npy"}
|
||||
{"epoch": 0.6152716593245228, "step": 420, "batch_size": 64, "mean": 175.04345703125, "std": 404.20941162109375, "min": -922.3924560546875, "p10": -260.60480346679685, "median": 141.72257232666016, "p90": 631.7746154785157, "max": 1244.71484375, "pos_frac": 0.640625, "sample": [-166.4317626953125, 203.75625610351562, -15.587928771972656, -12.65286636352539, 634.4232177734375, -87.1158676147461, -281.7257995605469, -17.46331787109375, 210.9411163330078, 604.392578125, -6.63325309753418, -56.09654998779297, -46.2691535949707, 182.94224548339844, -21.85662841796875, 108.09009552001953, 608.3553466796875, 261.16229248046875, 84.20869445800781, 210.48892211914062, 15.217710494995117, 239.21560668945312, -211.32247924804688, 417.1786193847656, -105.57382202148438, 139.14492797851562, -79.80024719238281, 443.41534423828125, 163.846435546875, -91.25955200195312, 475.8403625488281, 475.12811279296875, 472.2303466796875, -328.90936279296875, 106.34285736083984, 187.1236572265625, -306.17181396484375, 22.43329620361328, 703.0238037109375, 770.91162109375, -13.510213851928711, -922.3924560546875, 641.1181640625, 446.2230529785156, 625.5945434570312, 152.028076171875, 560.318115234375, 436.0422668457031, 376.61016845703125, 348.09490966796875, 33.69055938720703, -485.6843566894531, 285.1578369140625, 49.72146987915039, -66.54972839355469, 1221.1636962890625, -770.4302978515625, 80.21717834472656, 144.3002166748047, 216.884033203125, 1244.71484375, -108.2766342163086, -417.1904602050781, 1219.9921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000420.npy"}
|
||||
{"epoch": 0.6167400881057269, "step": 421, "batch_size": 64, "mean": 206.85391235351562, "std": 321.661865234375, "min": -895.900390625, "p10": -186.62778472900385, "median": 255.9329376220703, "p90": 602.7158142089844, "max": 870.4821166992188, "pos_frac": 0.75, "sample": [335.35595703125, 91.90191650390625, 132.49278259277344, -76.28687286376953, 271.5566711425781, 438.269775390625, 870.4821166992188, 1.954833984375, 101.09373474121094, 75.61000061035156, 285.13787841796875, -4.864048004150391, 590.437744140625, 662.0996704101562, 502.63751220703125, 659.0267333984375, 316.431396484375, -209.34922790527344, 115.82105255126953, 159.92257690429688, 775.77001953125, 477.949951171875, 119.75933837890625, -242.39859008789062, 234.1837921142578, 258.937255859375, 440.9156494140625, 285.09442138671875, -13.399253845214844, 212.84405517578125, 635.09228515625, 378.6275939941406, 607.9778442382812, 252.92861938476562, 339.7785339355469, -532.373291015625, 497.17218017578125, 344.1861572265625, -424.6907653808594, 555.5707397460938, 359.14300537109375, -20.417593002319336, 350.718017578125, 869.8255615234375, 259.6699523925781, 301.00799560546875, 162.57691955566406, -29.28361701965332, 30.83329200744629, 478.97418212890625, 105.49020385742188, -7.119682312011719, 285.468017578125, -895.900390625, -251.3241729736328, 126.20619201660156, -66.515625, -133.611083984375, -96.27099609375, 270.44854736328125, 190.98728942871094, -272.0531311035156, 320.9631652832031, 375.1748046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000421.npy"}
|
||||
{"epoch": 0.618208516886931, "step": 422, "batch_size": 64, "mean": 241.48162841796875, "std": 346.12890625, "min": -782.71875, "p10": -134.23046875, "median": 230.62425231933594, "p90": 682.5755920410156, "max": 1202.6287841796875, "pos_frac": 0.78125, "sample": [312.0087890625, 682.7091674804688, 771.1266479492188, 513.7540893554688, 357.08892822265625, -187.21595764160156, -620.9985961914062, 783.1788330078125, -54.05516815185547, 1202.6287841796875, -147.7425079345703, 178.26551818847656, 248.4519500732422, 48.61083984375, 405.3426513671875, 190.15243530273438, 63.20049285888672, -128.12277221679688, -44.68716049194336, 186.3659210205078, 21.475601196289062, 808.7027587890625, 338.6158447265625, 138.23687744140625, 447.5605163574219, 168.56332397460938, 188.04534912109375, 683.1207275390625, 262.9815368652344, 583.7330322265625, 757.3829956054688, 347.97589111328125, 407.7019958496094, 212.7965545654297, 360.0581359863281, 347.7673034667969, 318.78515625, -428.4002685546875, 291.7359313964844, 40.05169677734375, -46.4642219543457, 389.96514892578125, 210.633544921875, -782.71875, -72.63711547851562, 595.0206298828125, 488.12567138671875, 29.76704216003418, 682.263916015625, -109.50462341308594, 408.66461181640625, 182.10523986816406, 20.95855712890625, 70.83558654785156, 643.6079711914062, 461.7234191894531, 380.70635986328125, -64.65217590332031, 509.0077819824219, 495.7387390136719, -136.84805297851562, 143.1200408935547, 189.37677001953125, -290.92498779296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000422.npy"}
|
||||
{"epoch": 0.6196769456681351, "step": 423, "batch_size": 64, "mean": 272.1937561035156, "std": 310.9986572265625, "min": -431.39471435546875, "p10": -86.6031585693359, "median": 217.64097595214844, "p90": 715.9066833496096, "max": 1097.23779296875, "pos_frac": 0.78125, "sample": [273.6288146972656, 92.75792694091797, 182.31724548339844, 36.48382568359375, -99.97140502929688, -331.0894470214844, 798.2338256835938, 213.68869018554688, 638.423095703125, 475.0898132324219, 731.8250122070312, -136.91070556640625, 909.4207763671875, 293.43731689453125, 503.0030822753906, -31.99545669555664, 797.435546875, 812.367919921875, 794.7188720703125, 678.763916015625, -21.25262451171875, -50.5589599609375, 403.90838623046875, 198.90676879882812, 593.2373657226562, 254.66815185546875, 98.95587921142578, 60.876861572265625, -431.39471435546875, -131.58230590820312, 371.5634765625, -128.3828582763672, 307.7250671386719, 324.8765869140625, 647.2735595703125, 216.34149169921875, -2.9723987579345703, 508.7198486328125, -139.96009826660156, 137.07208251953125, 314.383544921875, 528.2266845703125, 218.94046020507812, 341.3523254394531, 1097.23779296875, 190.76255798339844, 454.7349548339844, -28.943649291992188, 443.92535400390625, 18.056472778320312, 59.41267013549805, 165.0417938232422, 509.892578125, 435.1011657714844, 172.92156982421875, 184.2457733154297, -55.41058349609375, 219.6685791015625, 173.778564453125, -42.938507080078125, 577.2544555664062, 96.70792388916016, 124.69247436523438, 371.7063903808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000423.npy"}
|
||||
{"epoch": 0.6211453744493393, "step": 424, "batch_size": 64, "mean": 246.94146728515625, "std": 335.9071350097656, "min": -346.24932861328125, "p10": -190.9647735595703, "median": 227.43800354003906, "p90": 631.04404296875, "max": 1261.65380859375, "pos_frac": 0.796875, "sample": [-256.3470764160156, 341.2500915527344, 56.54417419433594, -24.45193099975586, -56.39727783203125, 531.5452880859375, 435.2151184082031, 299.4146423339844, 82.95377349853516, -346.24932861328125, 258.6968688964844, 273.84576416015625, 22.140275955200195, -236.51097106933594, 265.5362243652344, 683.2979736328125, 469.6280212402344, 888.0587158203125, 133.0167694091797, 423.4003601074219, 517.8180541992188, 176.47666931152344, 633.332275390625, 625.704833984375, 322.53314208984375, -129.63491821289062, 230.6537322998047, 95.73722076416016, -212.26763916015625, -194.57012939453125, 119.98332977294922, 1125.13232421875, -182.55227661132812, 302.8516540527344, 405.3380126953125, -124.5113525390625, 353.47283935546875, -282.01025390625, 234.56719970703125, 79.96560668945312, -83.80797576904297, 138.46347045898438, -325.70855712890625, 66.49371337890625, 270.5622863769531, 299.4455261230469, 211.47537231445312, 83.84809112548828, 336.2838439941406, 1261.65380859375, 365.1329650878906, 612.4549560546875, 376.4134826660156, 181.5653076171875, 374.74896240234375, 90.34736633300781, 813.6777954101562, 164.72793579101562, 166.7066650390625, 1087.6466064453125, 467.75970458984375, 224.22227478027344, 129.5416717529297, 147.990478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000424.npy"}
|
||||
{"epoch": 0.6226138032305433, "step": 425, "batch_size": 64, "mean": 190.22943115234375, "std": 327.501953125, "min": -832.8908081054688, "p10": -142.7957015991211, "median": 196.04537963867188, "p90": 594.6785522460938, "max": 811.0615234375, "pos_frac": 0.765625, "sample": [-91.33145141601562, 21.942487716674805, 428.1379089355469, -336.84429931640625, -832.8908081054688, 189.58438110351562, 184.8005828857422, 275.09991455078125, 125.54391479492188, 192.99977111816406, -75.45134735107422, 520.6321411132812, -771.5028076171875, -60.83449172973633, 282.0426940917969, 198.03656005859375, 121.31632232666016, 151.4298095703125, 598.7151489257812, 24.894729614257812, 356.33258056640625, 195.80706787109375, -1.6959495544433594, 238.90357971191406, 643.637939453125, -43.50530242919922, 477.2652587890625, 71.41828918457031, -78.34757995605469, 791.2275390625, 136.82168579101562, 585.2598266601562, 616.5952758789062, -214.9292449951172, 176.60047912597656, 259.2447814941406, 678.9827270507812, 163.0972137451172, 83.43536376953125, 751.666259765625, -481.53875732421875, 396.99578857421875, 100.19929504394531, -347.1521301269531, 489.4547424316406, 357.82122802734375, 446.8091735839844, 226.81683349609375, 6.14617919921875, 368.26239013671875, 196.28369140625, 233.8297119140625, -143.4524688720703, -141.26324462890625, 258.9889221191406, 355.4774169921875, 492.511962890625, 811.0615234375, 479.7212829589844, 263.8235168457031, 481.50799560546875, -128.18142700195312, 259.8353271484375, 156.58612060546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000425.npy"}
|
||||
{"epoch": 0.6240822320117474, "step": 426, "batch_size": 64, "mean": 240.19671630859375, "std": 305.95257568359375, "min": -649.3045654296875, "p10": -65.49773178100583, "median": 183.42970275878906, "p90": 687.3767822265627, "max": 855.2794799804688, "pos_frac": 0.796875, "sample": [259.48126220703125, 105.62689971923828, 106.9115982055664, 172.624267578125, 230.98153686523438, 74.62117004394531, 104.94891357421875, 223.87744140625, 742.2926025390625, 159.16798400878906, 165.10462951660156, 237.40737915039062, 11.208797454833984, -0.17346763610839844, 638.1505126953125, -241.86749267578125, 64.32127380371094, 207.55160522460938, 499.0206604003906, 166.71295166015625, -78.05020904541016, 768.6681518554688, -138.68389892578125, 194.23513793945312, 126.28953552246094, 111.45455932617188, -113.47737121582031, 358.9857482910156, 780.8685302734375, 387.9369812011719, 226.7650909423828, 616.98095703125, 68.98114776611328, 348.1797180175781, 165.97401428222656, 825.049560546875, -36.2086181640625, -356.19183349609375, 117.82454681396484, 493.667236328125, -35.98187255859375, 343.10205078125, 855.2794799804688, 75.65868377685547, 449.18597412109375, 52.292686462402344, 348.7048645019531, 543.7017211914062, -21.2967529296875, 102.72114562988281, -290.38140869140625, 354.6189880371094, -11.714698791503906, 435.56903076171875, 558.0777587890625, 308.215576171875, -16.59795570373535, 466.248779296875, 829.3765869140625, -649.3045654296875, 584.3741455078125, 708.4737548828125, 47.301605224609375, 537.7457275390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000426.npy"}
|
||||
{"epoch": 0.6255506607929515, "step": 427, "batch_size": 64, "mean": 180.07781982421875, "std": 286.99896240234375, "min": -452.6756591796875, "p10": -160.45197906494138, "median": 154.61355590820312, "p90": 443.8719970703126, "max": 956.353759765625, "pos_frac": 0.765625, "sample": [332.7876281738281, 55.31534957885742, 320.97662353515625, 410.6332092285156, 818.9610595703125, 82.70931243896484, -51.870338439941406, 12.037452697753906, 386.8540954589844, 246.35202026367188, -22.863189697265625, 60.68513488769531, 118.33151245117188, 154.13050842285156, 155.0966033935547, 359.306396484375, 417.2903137207031, 888.8665771484375, 34.122596740722656, -237.89175415039062, 37.307212829589844, 133.13543701171875, 306.92547607421875, 325.0092468261719, 107.8171615600586, 305.1351318359375, 269.8572998046875, 902.0559692382812, 29.094100952148438, 170.7053680419922, -220.6746826171875, 487.8512268066406, 423.2862548828125, -114.8899917602539, -92.65388488769531, 280.63897705078125, 338.00567626953125, -57.334896087646484, 452.6944580078125, 189.54458618164062, 224.35955810546875, 115.63475036621094, 134.23550415039062, 141.13925170898438, -452.6756591796875, 270.0388488769531, 414.80511474609375, 395.4725036621094, -350.347900390625, -304.31524658203125, 483.76416015625, 219.96160888671875, 393.0436096191406, 2.9090805053710938, 956.353759765625, -119.99362182617188, 62.0302734375, 312.3968811035156, 364.0513610839844, 106.0516586303711, -140.0765380859375, -324.6298522949219, -25.385887145996094, -169.18431091308594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000427.npy"}
|
||||
{"epoch": 0.6270190895741556, "step": 428, "batch_size": 64, "mean": 253.51040649414062, "std": 369.8224792480469, "min": -682.4998168945312, "p10": -152.75807266235347, "median": 238.04273986816406, "p90": 739.6875732421875, "max": 1103.1668701171875, "pos_frac": 0.765625, "sample": [-47.02299118041992, -196.83297729492188, 19.256555557250977, 420.3139953613281, 821.05419921875, 407.64947509765625, 112.47679138183594, 538.272216796875, 418.4053955078125, 1103.1668701171875, -224.65127563476562, 55.04779052734375, 510.7420959472656, 215.02694702148438, 442.7959899902344, -98.66424560546875, -585.969482421875, 740.7425537109375, 465.38189697265625, 563.8134765625, 484.20111083984375, 401.93115234375, -79.82119750976562, 26.25320053100586, 474.7798767089844, 752.5039672851562, 378.47210693359375, 210.275634765625, -96.5399169921875, 274.4470520019531, -58.13677215576172, 368.52471923828125, 892.50146484375, -88.7422103881836, 261.05853271484375, 29.54680061340332, 564.3763427734375, 494.8524169921875, 26.781299591064453, 198.90325927734375, 96.69308471679688, 153.1824951171875, 3.6314010620117188, -117.99662017822266, -591.3901977539062, -76.86238098144531, 68.53977966308594, 126.44049835205078, 1086.1376953125, 168.70321655273438, 451.9906005859375, 450.42596435546875, 549.729736328125, -682.4998168945312, 553.6224975585938, 277.3660888671875, 737.2259521484375, 769.0225830078125, 369.54498291015625, 179.3059539794922, 682.2373046875, -167.6558380126953, 126.06565856933594, -185.96810913085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000428.npy"}
|
||||
{"epoch": 0.6284875183553598, "step": 429, "batch_size": 64, "mean": 192.28273010253906, "std": 349.7599182128906, "min": -621.39306640625, "p10": -232.1340148925781, "median": 193.67237854003906, "p90": 609.4571044921876, "max": 1030.1611328125, "pos_frac": 0.734375, "sample": [70.0045394897461, 157.658447265625, -380.669189453125, 319.6271057128906, -175.19070434570312, 619.2205810546875, -122.85692596435547, 124.62964630126953, 164.54656982421875, 362.60272216796875, -406.81707763671875, 179.50076293945312, 57.34862518310547, 132.17372131347656, -278.17559814453125, 342.3856506347656, 129.76480102539062, -233.44593811035156, 715.6298828125, -218.38771057128906, 1030.1611328125, 954.036865234375, 62.3221435546875, 374.1121520996094, 227.14675903320312, 38.08249282836914, 333.25897216796875, 8.180095672607422, 650.5851440429688, 519.394775390625, 240.25860595703125, 389.75665283203125, 382.97015380859375, -602.6761474609375, -78.4372329711914, -229.07286071777344, -240.56283569335938, 216.34388732910156, 236.5827178955078, -111.4657211303711, 573.329833984375, 370.77166748046875, 281.48834228515625, -25.671478271484375, 811.2740478515625, 294.2845458984375, 555.0064697265625, -210.90078735351562, 137.78904724121094, -178.47531127929688, 333.3249816894531, 156.2998809814453, 498.65570068359375, 207.843994140625, -1.476144790649414, -621.39306640625, 5.2016754150390625, 290.14825439453125, 56.76068878173828, 586.6756591796875, 389.7723083496094, 909.1142578125, 487.17694091796875, 438.56597900390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000429.npy"}
|
||||
{"epoch": 0.6299559471365639, "step": 430, "batch_size": 64, "mean": 200.9507293701172, "std": 279.0693664550781, "min": -364.5477294921875, "p10": -93.379630279541, "median": 154.52639770507812, "p90": 616.9558837890628, "max": 1026.72705078125, "pos_frac": 0.765625, "sample": [232.25811767578125, 210.1974334716797, -32.512229919433594, -79.33395385742188, 712.398193359375, 415.2826232910156, 19.404815673828125, 253.50970458984375, 158.60733032226562, 517.1475219726562, 61.04121780395508, -32.76799774169922, 87.35262298583984, -98.9593276977539, 120.37083435058594, 711.7987670898438, -276.7811584472656, 401.93646240234375, 77.52079772949219, 84.17007446289062, 191.2287139892578, 99.25711822509766, 145.3650665283203, 150.44546508789062, -21.202966690063477, 85.83158874511719, 383.53302001953125, 334.7643737792969, -80.36033630371094, 195.87562561035156, 84.78282165527344, -150.6365203857422, 1026.72705078125, -15.210220336914062, 108.3761978149414, 549.0935668945312, 42.39147186279297, -364.5477294921875, 23.843931198120117, 228.8636016845703, 454.6043701171875, 750.0943603515625, 441.9651184082031, 133.0791015625, 268.21563720703125, 700.298583984375, -188.4542999267578, -215.87457275390625, 495.2910461425781, 162.02195739746094, 78.31340026855469, -56.16953659057617, 436.4301452636719, 227.44065856933594, -4.25897216796875, 277.28106689453125, 348.173828125, -226.38986206054688, 338.3731689453125, 791.4745483398438, 646.0397338867188, 224.94473266601562, 29.23011016845703, 187.65904235839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000430.npy"}
|
||||
{"epoch": 0.631424375917768, "step": 431, "batch_size": 64, "mean": 296.3401794433594, "std": 311.6828918457031, "min": -431.9054870605469, "p10": -105.97272109985347, "median": 258.3608093261719, "p90": 654.9005432128907, "max": 995.7406005859375, "pos_frac": 0.84375, "sample": [-231.79771423339844, 175.2014923095703, -144.46775817871094, 476.0992126464844, 238.73367309570312, 99.14701080322266, 785.0052490234375, 154.62469482421875, 345.9182434082031, 522.3546142578125, 256.8866882324219, 449.7737121582031, 103.2664794921875, -54.24300765991211, -135.31692504882812, 154.30601501464844, 32.490020751953125, -404.95306396484375, 539.4158935546875, -237.7196502685547, 620.3228759765625, 805.1655883789062, 995.7406005859375, 858.138671875, 150.68040466308594, 624.7637939453125, 408.8958740234375, 439.035888671875, 250.34446716308594, 287.71148681640625, 664.490966796875, 360.88934326171875, 242.702392578125, 83.67604064941406, 441.38330078125, -431.9054870605469, 976.3909912109375, 7.3450164794921875, 259.8349304199219, -65.72258758544922, 486.0613708496094, 228.52027893066406, 871.7593994140625, -3.5916481018066406, 169.4008331298828, 184.00216674804688, 424.09722900390625, 31.010324478149414, 271.7071838378906, 330.556884765625, 399.4628601074219, 626.4011840820312, 632.5228881835938, 351.32257080078125, 526.993408203125, 429.7541809082031, 172.090576171875, 118.07575225830078, -123.2227783203125, 236.14939880371094, 595.9600219726562, 179.0113525390625, 174.4451141357422, 548.671142578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000431.npy"}
|
||||
{"epoch": 0.6328928046989721, "step": 432, "batch_size": 64, "mean": 277.9825134277344, "std": 291.83416748046875, "min": -301.68914794921875, "p10": -9.546617507934561, "median": 232.50311279296875, "p90": 687.4619567871093, "max": 1293.360107421875, "pos_frac": 0.875, "sample": [35.89079284667969, 167.42324829101562, 199.7931365966797, 244.27923583984375, 193.10035705566406, 205.19505310058594, 143.06756591796875, 10.307422637939453, 170.645263671875, 3.5677032470703125, 33.338077545166016, 22.483287811279297, 653.301513671875, 420.3561096191406, -13.153766632080078, 1293.360107421875, 581.2919921875, 279.7154846191406, 4.9843902587890625, 220.72698974609375, 175.92758178710938, 68.56613159179688, 273.3524169921875, 45.574188232421875, 497.67919921875, 295.1812744140625, 685.2906494140625, 173.69834899902344, 435.244384765625, 50.40655517578125, 254.26495361328125, -19.561317443847656, -180.53929138183594, 247.2772979736328, -159.38616943359375, 613.9800415039062, 156.6365203857422, -151.5294952392578, 354.8279724121094, 140.2055206298828, 188.73892211914062, -131.8951416015625, 312.34027099609375, 454.51434326171875, 269.1627197265625, 387.280517578125, 612.0333862304688, 432.2349853515625, 406.7939453125, 349.4140625, 152.35292053222656, 535.6626586914062, 18.487686157226562, 822.9622802734375, 268.3855895996094, 119.57302856445312, 751.6953735351562, 342.1283264160156, 688.3925170898438, 834.0179443359375, -301.68914794921875, 755.4327392578125, -1.1299362182617188, 697.220703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000432.npy"}
|
||||
{"epoch": 0.6343612334801763, "step": 433, "batch_size": 64, "mean": 185.67514038085938, "std": 313.65447998046875, "min": -781.3613891601562, "p10": -189.79500122070309, "median": 196.85953521728516, "p90": 559.5354431152344, "max": 701.102294921875, "pos_frac": 0.765625, "sample": [31.533050537109375, 531.0285034179688, 502.7533264160156, 475.5074462890625, 418.8401794433594, -118.07252502441406, 289.2899169921875, -472.489013671875, 499.2437438964844, 570.60107421875, -781.3613891601562, 38.91923522949219, 562.4290771484375, 321.8430480957031, 89.55340576171875, 12.723766326904297, 189.92649841308594, 307.9010009765625, 615.7094116210938, -735.6954345703125, -18.5462646484375, 552.7836303710938, 562.9437866210938, 295.19580078125, 219.27810668945312, 189.60513305664062, 47.51277160644531, 203.79257202148438, -41.66032409667969, 392.1481628417969, 28.226531982421875, 701.102294921875, -298.6877136230469, 432.74505615234375, 156.3175506591797, 431.08660888671875, 500.27459716796875, -229.98001098632812, 344.230224609375, 8.667793273925781, 678.1493530273438, 491.0878601074219, 268.8655090332031, -129.16717529296875, 188.8275146484375, 57.354591369628906, 350.4879455566406, 14.147804260253906, -204.94874572753906, -266.78533935546875, 592.0054321289062, 421.4702453613281, 172.5316162109375, -33.96128463745117, 118.60018920898438, -154.43626403808594, 9.034400939941406, 363.0152282714844, -76.88200378417969, -0.13443756103515625, 307.51220703125, 414.09088134765625, 185.60389709472656, 289.5189208984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000433.npy"}
|
||||
{"epoch": 0.6358296622613803, "step": 434, "batch_size": 64, "mean": 224.49386596679688, "std": 348.83428955078125, "min": -282.6473388671875, "p10": -95.26909027099607, "median": 165.888427734375, "p90": 650.6037536621096, "max": 1978.857177734375, "pos_frac": 0.71875, "sample": [459.2978210449219, 201.2190399169922, 1978.857177734375, 86.85194396972656, -117.21156311035156, 43.327423095703125, 192.17404174804688, 439.4589538574219, -5.451833724975586, -58.55821228027344, 236.8166961669922, -79.15348815917969, 86.3728256225586, 671.4127197265625, -37.5266227722168, 115.8128662109375, -115.7853012084961, -282.6473388671875, 488.5182189941406, 602.0494995117188, 812.7061767578125, -162.21640014648438, 127.94790649414062, 231.22357177734375, 293.6058349609375, 191.29710388183594, -11.339040756225586, -40.1368408203125, 227.0673370361328, 236.83944702148438, 82.7540283203125, 210.46633911132812, 407.1806945800781, -101.39128112792969, 35.54029846191406, 423.5848693847656, 117.20852661132812, 101.56179809570312, 196.61553955078125, 165.04116821289062, -53.49505615234375, 386.2742614746094, 596.484375, 75.06352233886719, 166.73568725585938, 260.47650146484375, 422.2157897949219, 131.1014862060547, 184.03729248046875, 806.2904663085938, 290.04949951171875, -42.14849853515625, 701.0137939453125, 7.423572540283203, 137.4336395263672, -20.612762451171875, 849.2034912109375, 189.3912353515625, -80.98397827148438, -184.850341796875, 847.2916259765625, 472.7655029296875, -5.0756683349609375, -219.86961364746094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000434.npy"}
|
||||
{"epoch": 0.6372980910425844, "step": 435, "batch_size": 64, "mean": 244.86376953125, "std": 302.4591064453125, "min": -548.207763671875, "p10": -40.08277587890624, "median": 227.8274688720703, "p90": 624.2052856445314, "max": 992.9705810546875, "pos_frac": 0.828125, "sample": [951.7783203125, 399.379638671875, -548.207763671875, 361.55865478515625, 515.0526733398438, 237.39517211914062, 312.9373779296875, 512.9464721679688, 340.8703308105469, 100.41645812988281, 523.5924682617188, 179.02366638183594, 85.6324462890625, 264.3952331542969, -44.33290100097656, 590.913330078125, 992.9705810546875, 849.0405883789062, 260.806640625, -131.48826599121094, -11.371490478515625, 5.30316162109375, -30.165817260742188, 37.49125671386719, -114.29196166992188, 100.35360717773438, 248.27992248535156, 226.55670166015625, -120.18250274658203, 213.35699462890625, 255.21038818359375, -15.84321403503418, 157.46405029296875, 334.9220275878906, 451.42535400390625, 328.2316589355469, 440.9908447265625, -464.528076171875, 483.5765686035156, -355.6251220703125, 217.17828369140625, 229.09823608398438, 638.4732666015625, 1.49285888671875, 104.0873031616211, 50.90114212036133, 528.9202880859375, 557.3208618164062, 18.30405044555664, -16.445497512817383, 823.57421875, 44.30171203613281, 237.14205932617188, 49.9337158203125, 178.61672973632812, 709.2234497070312, 649.4585571289062, 270.5333557128906, 213.59495544433594, 473.021728515625, 153.02609252929688, 3.3448333740234375, 397.903564453125, 212.44161987304688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000435.npy"}
|
||||
{"epoch": 0.6387665198237885, "step": 436, "batch_size": 64, "mean": 213.5735321044922, "std": 277.6088562011719, "min": -596.918212890625, "p10": -139.2433006286621, "median": 184.8743133544922, "p90": 524.2713806152344, "max": 887.57666015625, "pos_frac": 0.828125, "sample": [-85.63920593261719, 169.09056091308594, -167.5062713623047, 60.260643005371094, 517.9721069335938, 260.98284912109375, 326.8407287597656, 777.6779174804688, 516.438720703125, -163.28726196289062, 151.93016052246094, 804.859619140625, -11.354888916015625, -154.7533721923828, 267.7130126953125, 241.25439453125, -147.12339782714844, 44.332489013671875, 872.3900146484375, -220.17977905273438, 419.177490234375, 365.3528747558594, 267.12030029296875, 380.18194580078125, 526.9710693359375, 113.54107666015625, 425.432861328125, 570.9832153320312, 102.68757629394531, 53.79255676269531, 887.57666015625, 45.99237823486328, 478.2041320800781, 160.5799102783203, 276.4778137207031, 186.30191040039062, 59.46153259277344, 48.190773010253906, 249.36376953125, 130.21435546875, 425.4653015136719, 63.290103912353516, 181.482421875, 120.12317657470703, 138.6888885498047, 296.1444396972656, 234.0612335205078, -172.3565673828125, 183.44671630859375, 194.02374267578125, 280.54132080078125, 212.41555786132812, 223.80575561523438, 458.3995361328125, 827.6874389648438, 32.75053405761719, 84.01593780517578, 28.320663452148438, -596.918212890625, 224.3633575439453, -120.85640716552734, 109.71969604492188, 448.4774169921875, -17.889190673828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000436.npy"}
|
||||
{"epoch": 0.6402349486049926, "step": 437, "batch_size": 64, "mean": 220.84902954101562, "std": 268.0671691894531, "min": -455.6229248046875, "p10": -83.92538604736328, "median": 217.6727523803711, "p90": 535.9862426757813, "max": 968.8397827148438, "pos_frac": 0.78125, "sample": [116.50404357910156, -254.15414428710938, 211.9887237548828, 462.4299011230469, 386.9308776855469, 18.477407455444336, 603.6633911132812, 181.67034912109375, 343.26397705078125, -30.441253662109375, -455.6229248046875, 214.67498779296875, -276.6971130371094, 447.2334899902344, -46.103363037109375, 203.89309692382812, 166.01797485351562, -240.29403686523438, 167.04681396484375, 541.70458984375, 44.35540008544922, 405.47564697265625, 220.67051696777344, 1.2797908782958984, -133.4515380859375, 614.876708984375, 178.65744018554688, 112.6337661743164, 386.80517578125, 366.0675048828125, 769.552490234375, 343.65643310546875, 711.460205078125, -87.15592956542969, 968.8397827148438, -76.387451171875, -37.408531188964844, 17.475584030151367, 265.1935119628906, 243.59829711914062, 17.387718200683594, 27.150543212890625, 276.949462890625, 321.635009765625, 386.69427490234375, 162.80743408203125, 460.6252746582031, 438.5615234375, 437.97650146484375, 522.6434326171875, 178.3894500732422, 441.9559631347656, 401.20318603515625, -48.822265625, -101.2232894897461, 565.6619262695312, 298.6283874511719, 390.3963623046875, -69.18913269042969, 277.42333984375, 38.024539947509766, 416.7113952636719, 227.27066040039062, -12.905693054199219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000437.npy"}
|
||||
{"epoch": 0.6417033773861968, "step": 438, "batch_size": 64, "mean": 254.470458984375, "std": 280.3930358886719, "min": -390.41815185546875, "p10": -64.91844482421874, "median": 241.19740295410156, "p90": 643.617724609375, "max": 941.7203369140625, "pos_frac": 0.8125, "sample": [90.872314453125, 29.42533302307129, 393.3481140136719, -97.10731506347656, 157.21133422851562, 121.46846008300781, 801.663818359375, -44.49726867675781, 155.76547241210938, -50.30908203125, 283.29437255859375, 390.5615234375, 3.2240867614746094, 236.52761840820312, 382.61651611328125, 445.50604248046875, 302.89715576171875, 941.7203369140625, 119.89497375488281, 325.4915771484375, 155.84774780273438, 579.8436889648438, 389.92071533203125, 628.8485107421875, 165.61532592773438, 191.0257568359375, 43.84980010986328, 276.8838806152344, -330.1214294433594, -44.1634521484375, 9.965696334838867, -3.451292037963867, 444.3481140136719, 676.4754028320312, 581.0574951171875, 338.4203186035156, 258.88934326171875, -303.2053527832031, 551.178955078125, 299.3300476074219, 701.632568359375, 191.17269897460938, -89.83790588378906, 716.5970458984375, -55.02154541015625, 245.8671875, 526.59033203125, 386.055419921875, 472.57635498046875, 134.84100341796875, 85.96575927734375, 329.9692687988281, 705.74609375, 93.98826599121094, -69.15997314453125, 121.29806518554688, 193.85302734375, -390.41815185546875, -78.81452178955078, 600.1888427734375, 123.42533874511719, 649.9473876953125, 331.4766845703125, 458.0364074707031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000438.npy"}
|
||||
{"epoch": 0.6431718061674009, "step": 439, "batch_size": 64, "mean": 209.87954711914062, "std": 323.9346923828125, "min": -518.1369018554688, "p10": -84.1396415710449, "median": 157.5280532836914, "p90": 699.7507263183594, "max": 1040.1805419921875, "pos_frac": 0.75, "sample": [257.4119567871094, 743.2926025390625, 102.50942993164062, 311.8546142578125, 90.042236328125, -65.57339477539062, 700.4603881835938, 50.62965393066406, 169.45309448242188, 47.30352020263672, -365.5030517578125, 701.446044921875, 87.60649108886719, -41.62706756591797, -6.302696228027344, 291.4482116699219, 320.9125061035156, 524.712158203125, 654.4495849609375, 195.98007202148438, 87.95521545410156, 908.4019775390625, 233.36270141601562, -50.249237060546875, 182.33709716796875, -207.3492431640625, 145.60301208496094, -14.559406280517578, -3.4623489379882812, 43.46758270263672, 313.7989196777344, -206.10226440429688, 248.30259704589844, 430.6458435058594, 1040.1805419921875, 349.56268310546875, 566.4623413085938, 236.23370361328125, 100.88313293457031, 285.77880859375, 134.7952880859375, -88.66915130615234, -394.75653076171875, -46.870697021484375, 40.750099182128906, 215.19622802734375, 136.23617553710938, 260.98040771484375, -371.8164978027344, 584.00146484375, 698.0948486328125, 33.807247161865234, 254.14707946777344, 738.3046875, 81.94953155517578, -71.56004333496094, 479.15582275390625, 384.9121398925781, 106.1964111328125, 87.52684020996094, 991.3967895507812, 308.46038818359375, -518.1369018554688, -73.57078552246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000439.npy"}
|
||||
{"epoch": 0.644640234948605, "step": 440, "batch_size": 64, "mean": 237.09425354003906, "std": 338.2253112792969, "min": -269.9552307128906, "p10": -111.87857055664062, "median": 141.69517517089844, "p90": 712.3460571289063, "max": 1421.0328369140625, "pos_frac": 0.78125, "sample": [310.39459228515625, 346.2265625, 362.6537170410156, 329.979248046875, -118.62042236328125, 44.91362762451172, 622.2859497070312, 117.86322784423828, 58.74019241333008, 821.0775756835938, 38.648521423339844, 108.91085815429688, 264.0843505859375, 122.15371704101562, 746.1121826171875, -197.011474609375, 131.22439575195312, -188.8848419189453, 28.365447998046875, 6.532676696777344, 178.65451049804688, 590.5460815429688, 103.8123779296875, 44.03260040283203, 385.58880615234375, 233.2590789794922, -113.22075653076172, 267.4451904296875, 402.87481689453125, 97.40786743164062, 121.66714477539062, 696.8355712890625, -100.01699829101562, 266.98455810546875, 18.051454544067383, -57.74969482421875, 1189.8101806640625, -269.9552307128906, 207.84637451171875, -7.423883438110352, 839.2601318359375, 267.3398742675781, 938.696533203125, 495.56591796875, 215.62612915039062, 65.9223403930664, -131.91574096679688, 152.18760681152344, 420.1643371582031, 508.25128173828125, 152.16595458984375, 718.993408203125, -28.422706604003906, 113.7334976196289, 13.149654388427734, 532.1976318359375, 40.34714126586914, -40.55804443359375, -108.7468032836914, 166.91217041015625, 1421.0328369140625, 501.2826843261719, -265.0733642578125, -26.18096923828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000440.npy"}
|
||||
{"epoch": 0.6461086637298091, "step": 441, "batch_size": 64, "mean": 242.307861328125, "std": 422.6373291015625, "min": -758.6912231445312, "p10": -224.913833618164, "median": 218.89728546142578, "p90": 728.9767150878907, "max": 1949.5408935546875, "pos_frac": 0.703125, "sample": [261.6409912109375, 163.05677795410156, 222.4073486328125, 527.698486328125, 432.7228698730469, 780.564208984375, -61.33282470703125, 29.131820678710938, 145.86239624023438, -385.1640319824219, -144.0116424560547, -47.61113739013672, 693.4464111328125, 966.3817138671875, 330.0970458984375, -356.9550476074219, 136.8452911376953, 750.29150390625, 112.67236328125, -18.709136962890625, 41.45164489746094, 106.52293395996094, -9.868011474609375, 741.0672607421875, 755.0481567382812, 120.77310180664062, 243.91929626464844, 397.6943664550781, 377.23663330078125, 1949.5408935546875, 58.84009552001953, -489.9412536621094, 298.030029296875, 592.77099609375, -478.29693603515625, -164.0416259765625, 120.5118179321289, 282.6739501953125, 627.72216796875, -385.3050537109375, -80.49166870117188, 499.874267578125, -68.92631530761719, -758.6912231445312, 443.01873779296875, 462.75421142578125, 363.5408020019531, 446.2137756347656, 359.3819274902344, -55.92678451538086, 215.38722229003906, -251.00192260742188, -39.40716552734375, 406.251953125, 338.64813232421875, 101.02166748046875, 700.7654418945312, 382.50091552734375, 519.1722412109375, 1072.838623046875, -24.266239166259766, -62.498252868652344, 142.3424072265625, 669.8145751953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000441.npy"}
|
||||
{"epoch": 0.6475770925110133, "step": 442, "batch_size": 64, "mean": 288.12567138671875, "std": 379.1205749511719, "min": -467.2045593261719, "p10": -68.66317634582518, "median": 178.54246520996094, "p90": 771.5019775390625, "max": 1239.7232666015625, "pos_frac": 0.796875, "sample": [-77.25972747802734, 172.7056121826172, 528.7990112304688, 45.18359375, 301.36688232421875, 149.68392944335938, -23.25298309326172, 223.6863555908203, 64.03373718261719, 60.106197357177734, 5.545787811279297, 413.66937255859375, -8.504371643066406, 430.351806640625, 384.6461181640625, 643.2684326171875, 902.2628784179688, 425.247314453125, -13.7484130859375, 184.3793182373047, 170.16656494140625, 511.09381103515625, 48.20362854003906, 299.8768005371094, -206.4717254638672, 239.1339111328125, 759.1015014648438, 320.48321533203125, 954.0906982421875, 197.1710205078125, 109.42666625976562, -467.2045593261719, 141.45217895507812, -86.01322937011719, -4.435127258300781, 591.0211181640625, -48.604557037353516, 486.8692321777344, 102.24425506591797, 1215.86328125, -227.8431396484375, 324.4539794921875, 776.8164672851562, 41.39617156982422, 54.50469970703125, 1239.7232666015625, 754.73779296875, 270.5860900878906, 628.7326049804688, 108.61495208740234, 63.2620849609375, -352.6156311035156, 30.027191162109375, 76.27552795410156, -25.527297973632812, -221.26829528808594, 342.1467590332031, 480.462646484375, 1203.456298828125, 686.4143676757812, 1115.3587646484375, 751.0132446289062, 124.24507141113281, 49.42994689941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000442.npy"}
|
||||
{"epoch": 0.6490455212922174, "step": 443, "batch_size": 64, "mean": 181.2572021484375, "std": 407.65924072265625, "min": -828.9405517578125, "p10": -247.88780517578124, "median": 179.47354125976562, "p90": 629.2391906738285, "max": 1395.349365234375, "pos_frac": 0.765625, "sample": [1229.2459716796875, 77.80950164794922, 886.222900390625, 19.854019165039062, 280.9000549316406, 120.44512939453125, -63.699867248535156, -104.83613586425781, -408.67828369140625, 108.85201263427734, 55.11149597167969, 28.119003295898438, 176.6466522216797, 222.20452880859375, 64.18545532226562, -783.1419067382812, 127.05030822753906, -216.24537658691406, -41.488731384277344, 13.553586959838867, 1173.0128173828125, 369.1768798828125, 547.1105346679688, 304.9827575683594, -594.1082153320312, -630.7301635742188, 195.74826049804688, 234.93251037597656, 661.7214965820312, -828.9405517578125, 389.26068115234375, 376.7913818359375, 294.7744140625, 135.24024963378906, 119.60157775878906, -127.04895782470703, 143.38092041015625, 429.9681396484375, 455.6659240722656, 184.6569061279297, -248.76683044433594, -59.60581970214844, 426.7402038574219, -270.1935119628906, 390.16119384765625, 126.79508972167969, 308.2156982421875, 1395.349365234375, 300.9069519042969, 132.571533203125, 553.4471435546875, 104.92742919921875, 196.3126983642578, 182.30043029785156, -180.2158966064453, 283.9382019042969, -245.8367462158203, 246.4922637939453, 166.6702880859375, 218.57965087890625, 809.0203857421875, 752.0997314453125, 194.3543701171875, 188.88870239257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000443.npy"}
|
||||
{"epoch": 0.6505139500734214, "step": 444, "batch_size": 64, "mean": 291.16546630859375, "std": 382.1694030761719, "min": -600.3107299804688, "p10": -109.14409484863279, "median": 243.67723846435547, "p90": 756.6373046875004, "max": 1450.9932861328125, "pos_frac": 0.78125, "sample": [345.627197265625, -329.63336181640625, 83.06415557861328, 595.219482421875, 32.04308319091797, 326.4784851074219, -177.66114807128906, 1450.9932861328125, 839.6063232421875, -47.99495315551758, -91.20956420898438, 220.14102172851562, 185.58642578125, 561.1853637695312, 618.9464111328125, 154.31121826171875, 798.6385498046875, 458.85791015625, -154.81967163085938, 124.39979553222656, 78.60279083251953, -16.324411392211914, 401.0252380371094, 845.5009765625, 87.28797149658203, 370.13067626953125, 329.4105224609375, -35.111083984375, 243.73350524902344, -401.73577880859375, -18.41106605529785, 1401.490966796875, 285.8908996582031, 269.13616943359375, 308.0478515625, 462.60491943359375, 1043.859130859375, 429.1774597167969, 526.4725952148438, 226.34512329101562, -116.830322265625, 1000.4182739257812, 624.320068359375, 69.63423919677734, 555.4668579101562, 560.3212280273438, 616.8084716796875, -0.9400844573974609, 182.32679748535156, 148.88587951660156, 248.9389190673828, 199.78977966308594, 263.40972900390625, 213.77401733398438, 481.3269958496094, 243.6209716796875, 17.008943557739258, -39.93745422363281, -288.1499938964844, 67.02658081054688, 658.6343994140625, -600.3107299804688, 205.32644653320312, 492.80609130859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000444.npy"}
|
||||
{"epoch": 0.6519823788546255, "step": 445, "batch_size": 64, "mean": 275.9429931640625, "std": 352.4450988769531, "min": -421.99224853515625, "p10": -113.23955383300779, "median": 213.82263946533203, "p90": 687.4292785644532, "max": 1289.794189453125, "pos_frac": 0.796875, "sample": [232.69161987304688, 57.21794891357422, 3.4404220581054688, 876.9952392578125, 17.913528442382812, 555.2823486328125, 488.63751220703125, 125.82894134521484, 41.126922607421875, 177.98838806152344, 118.6009521484375, 599.202880859375, 21.664077758789062, 419.1983947753906, 209.08047485351562, -421.99224853515625, 81.98812866210938, 466.2181091308594, 416.7038269042969, 218.56480407714844, 412.1383056640625, 377.13116455078125, 523.0631103515625, 681.1966552734375, 54.5457649230957, 133.96441650390625, 1289.794189453125, 472.2721862792969, 534.0497436523438, 638.1159057617188, 702.0958251953125, 673.7062377929688, 174.29884338378906, 692.3230590820312, 14.650009155273438, -192.1153564453125, 205.64498901367188, 314.2275695800781, -123.37486267089844, 549.8582153320312, 30.643089294433594, 1004.1171875, 498.5564880371094, -273.9095458984375, 690.1004028320312, 315.77606201171875, 139.13702392578125, -234.9840087890625, -5.225639343261719, -64.49739074707031, 273.0466003417969, -47.81909942626953, -88.81370544433594, 415.71099853515625, 119.9598388671875, 373.0858154296875, -31.324310302734375, -234.97787475585938, 1214.2554931640625, -89.59049987792969, 599.1041259765625, 174.5264892578125, 380.8966064453125, -331.3621520996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000445.npy"}
|
||||
{"epoch": 0.6534508076358296, "step": 446, "batch_size": 64, "mean": 297.735595703125, "std": 318.5719299316406, "min": -617.8701782226562, "p10": -27.297586822509757, "median": 258.38414764404297, "p90": 756.0079772949218, "max": 1146.9598388671875, "pos_frac": 0.828125, "sample": [19.08460235595703, 397.46807861328125, 27.088638305664062, 760.08837890625, 856.3079223632812, 349.8673400878906, 16.314075469970703, -617.8701782226562, 223.17164611816406, 687.7974853515625, 843.4495239257812, 401.39105224609375, -3.7780990600585938, 718.7744140625, 486.5478820800781, 592.380126953125, 607.42431640625, 363.15216064453125, 172.83349609375, 391.41064453125, -51.01568603515625, 164.43048095703125, 136.2218017578125, 218.03746032714844, 236.54859924316406, -401.30914306640625, 129.2909393310547, 154.79556274414062, 508.275634765625, 282.2353515625, 468.0987243652344, -99.93595886230469, -17.634674072265625, 193.53688049316406, 36.862953186035156, 198.03927612304688, -12.415023803710938, 461.32354736328125, 295.25787353515625, 757.013427734375, 389.30816650390625, 182.3120574951172, 753.6619262695312, 1146.9598388671875, 445.6221923828125, -31.43883514404297, 855.149658203125, 461.9782409667969, 335.21453857421875, 225.24102783203125, 431.2973327636719, -1.0995025634765625, -89.6258544921875, 142.57186889648438, 113.69542694091797, 215.542724609375, 126.8541259765625, 407.332275390625, 757.97216796875, 211.87005615234375, -249.12689208984375, 660.677001953125, 332.3265075683594, 280.2196960449219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000446.npy"}
|
||||
{"epoch": 0.6549192364170338, "step": 447, "batch_size": 64, "mean": 312.77081298828125, "std": 389.6471252441406, "min": -478.48187255859375, "p10": -117.48008422851562, "median": 280.9343566894531, "p90": 724.7666015625001, "max": 2055.42138671875, "pos_frac": 0.828125, "sample": [304.9451904296875, 94.62284088134766, 371.96337890625, 256.1649169921875, 28.233306884765625, -478.48187255859375, 462.1097106933594, 228.00076293945312, 327.4022216796875, -41.322479248046875, 29.33578872680664, 699.7735595703125, 265.5856628417969, 220.4945831298828, 770.0047607421875, 600.9479370117188, 789.4556274414062, 34.661773681640625, 2055.42138671875, 784.9448852539062, 278.45989990234375, 1140.65771484375, 462.859375, 422.54193115234375, -280.4287109375, 283.4088134765625, 558.1398315429688, -119.38677978515625, 318.9519348144531, 212.4800262451172, -166.76461791992188, -19.211715698242188, 0.6203384399414062, -122.86630249023438, 420.2314758300781, -113.0311279296875, 206.7392578125, 589.6704711914062, 95.9063720703125, 33.09436798095703, 569.5608520507812, 122.02327728271484, 139.68202209472656, -349.519287109375, 735.4779052734375, 508.42462158203125, 471.0036926269531, -223.8035888671875, 216.40975952148438, 28.14444923400879, 458.2203369140625, 433.7544860839844, 397.89208984375, 351.8436584472656, 148.9390869140625, 601.5785522460938, 372.0653076171875, 106.1604995727539, 1022.0177001953125, 638.689208984375, 94.99260711669922, 660.7198486328125, 537.6854858398438, -30.965606689453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000447.npy"}
|
||||
{"epoch": 0.6563876651982379, "step": 448, "batch_size": 64, "mean": 295.4989929199219, "std": 342.8846435546875, "min": -377.94903564453125, "p10": -118.21312713623044, "median": 252.69781494140625, "p90": 686.3301269531252, "max": 1380.341064453125, "pos_frac": 0.84375, "sample": [40.63127899169922, 19.810543060302734, -319.9833068847656, 165.49713134765625, 607.63427734375, 30.1495361328125, 128.1474151611328, -52.014625549316406, 313.35028076171875, -377.94903564453125, 491.6910400390625, 84.74417877197266, 707.0372314453125, 92.06777954101562, 253.27593994140625, 567.3194580078125, 41.82286834716797, -20.46461296081543, 622.6646728515625, -150.37289428710938, -131.6215057373047, 107.12652587890625, 712.3829345703125, 462.5648193359375, 250.52688598632812, 338.0111999511719, 237.4969482421875, 146.29437255859375, 477.36346435546875, 885.115234375, 299.09417724609375, 594.8101196289062, 0.8700160980224609, 368.63079833984375, 1380.341064453125, 11.11298942565918, 85.00996398925781, 191.4657440185547, 92.1098403930664, 342.2417907714844, 613.433349609375, 615.282470703125, 280.59967041015625, -211.6065673828125, 1075.626953125, 362.80120849609375, 252.11968994140625, -86.92691040039062, 480.8432922363281, 121.41909790039062, 301.864013671875, -191.6577606201172, 960.761962890625, -169.423583984375, 632.3016967773438, 877.68359375, 55.12506103515625, 355.18621826171875, 121.8019790649414, 325.81121826171875, 560.787841796875, 638.0135498046875, 599.1480102539062, 244.93218994140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000448.npy"}
|
||||
{"epoch": 0.657856093979442, "step": 449, "batch_size": 64, "mean": 298.13922119140625, "std": 441.2258605957031, "min": -861.931884765625, "p10": -170.46488189697266, "median": 245.79324340820312, "p90": 804.9815734863282, "max": 2228.80517578125, "pos_frac": 0.78125, "sample": [146.2692108154297, 32.432044982910156, 323.843017578125, 31.393417358398438, 703.9310913085938, 696.4794921875, 253.4423828125, 90.82488250732422, 244.6217041015625, 246.96478271484375, 898.92626953125, -231.48487854003906, -55.23145294189453, 230.23770141601562, 659.8994140625, 26.380550384521484, 457.62127685546875, 136.57272338867188, 113.95256042480469, -295.24151611328125, 824.8173828125, 294.67108154296875, 546.6625366210938, 785.135009765625, 365.11749267578125, 711.9011840820312, -278.8285217285156, 92.60829162597656, 405.0994873046875, 430.5421142578125, 170.5636749267578, 220.32452392578125, 52.34564208984375, 629.453857421875, 574.1015014648438, 730.7128295898438, -121.55461883544922, 93.58967590332031, -166.521240234375, -172.15501403808594, 2228.80517578125, 584.1475830078125, -131.78749084472656, 309.84564208984375, 429.86309814453125, 162.71817016601562, -88.68478393554688, -861.931884765625, -48.865726470947266, 1106.2060546875, 283.0150451660156, 848.083251953125, 813.4872436523438, -225.8180694580078, 490.40313720703125, 150.46365356445312, 456.22857666015625, 1020.156982421875, 288.21600341796875, -55.09318542480469, -221.64540100097656, 53.745849609375, 155.3922576904297, 433.5375671386719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000449.npy"}
|
||||
{"epoch": 0.6593245227606461, "step": 450, "batch_size": 64, "mean": 325.58074951171875, "std": 322.12158203125, "min": -703.2261352539062, "p10": 14.25559577941895, "median": 303.1432342529297, "p90": 751.0333007812502, "max": 1221.704345703125, "pos_frac": 0.90625, "sample": [381.9990234375, 663.5286254882812, 777.02978515625, 369.6996765136719, 73.142333984375, 685.6118774414062, 1221.704345703125, -71.26829528808594, 369.7063293457031, 12.377204895019531, 595.9802856445312, 581.0457153320312, 706.9932861328125, 376.7872619628906, 389.5497131347656, 780.080810546875, 79.7186279296875, 64.1314697265625, 223.30126953125, 88.54345703125, 397.37384033203125, 609.3881225585938, 530.0556640625, 769.9075927734375, 199.3820037841797, -33.22677230834961, 86.78117370605469, 902.7052001953125, 344.170166015625, 660.548828125, 76.24375915527344, 372.6268310546875, 250.2053680419922, 253.7841033935547, 302.2659606933594, 18.638507843017578, 212.58226013183594, 400.1438903808594, 239.4822998046875, 127.44792175292969, 874.5175170898438, -231.77137756347656, 125.08818817138672, 436.8830261230469, 148.86053466796875, 316.90423583984375, 56.34136199951172, 683.0179443359375, 229.24752807617188, 54.68784713745117, 501.56048583984375, -63.490020751953125, 587.2192993164062, 334.36126708984375, 77.0911636352539, -703.2261352539062, 347.08685302734375, 304.0205078125, 59.00049591064453, 226.6348419189453, 186.32452392578125, 1070.6400146484375, -36.69390106201172, 162.69287109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000450.npy"}
|
||||
{"epoch": 0.6607929515418502, "step": 451, "batch_size": 64, "mean": 233.91940307617188, "std": 429.757080078125, "min": -1278.791748046875, "p10": -230.60373077392572, "median": 265.2984161376953, "p90": 701.6096923828126, "max": 1188.33447265625, "pos_frac": 0.78125, "sample": [65.74514770507812, -134.97332763671875, -3.615936279296875, 323.68743896484375, 532.1426391601562, 267.4071960449219, 374.116943359375, 681.7728271484375, -36.78179168701172, 263.18963623046875, 515.3369140625, 1188.33447265625, 508.29486083984375, 823.55810546875, 139.65338134765625, 219.53787231445312, 130.1534423828125, -1278.791748046875, -671.0040893554688, -271.44964599609375, 174.15594482421875, 65.3156509399414, 472.6271057128906, 584.892333984375, 740.18115234375, 279.1002197265625, 427.307861328125, 197.9944305419922, 366.4124450683594, 617.1717529296875, 179.003662109375, 1125.0506591796875, 924.8400268554688, 363.4613342285156, 499.8690490722656, 515.0665283203125, 460.5085754394531, -359.07086181640625, 269.0024108886719, 343.72027587890625, -45.929771423339844, -145.6978759765625, 425.1796569824219, -1032.435302734375, 176.0077667236328, 178.85702514648438, 184.8754425048828, 167.2042236328125, 281.3406982421875, 65.21671295166016, 668.80615234375, -110.10411071777344, -255.22195434570312, 780.2260131835938, 541.38720703125, 96.50625610351562, 398.7322998046875, -495.6972351074219, 199.6905517578125, 10.682266235351562, 710.1112060546875, -173.1612091064453, 309.0564880371094, 152.28353881835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000451.npy"}
|
||||
{"epoch": 0.6622613803230544, "step": 452, "batch_size": 64, "mean": 281.9278564453125, "std": 296.2518310546875, "min": -373.6860656738281, "p10": -102.18248443603515, "median": 280.74534606933594, "p90": 687.0710876464844, "max": 922.7520141601562, "pos_frac": 0.84375, "sample": [880.71337890625, -259.6435852050781, 538.4014282226562, 671.68310546875, -103.0362777709961, 486.8250732421875, 442.3407287597656, 504.9695739746094, 566.337890625, -100.19029998779297, 511.61798095703125, 267.8137512207031, 142.3609161376953, 170.60333251953125, 385.963134765625, 299.6015930175781, 673.240234375, 293.67694091796875, 459.63079833984375, 38.23051452636719, 412.37982177734375, 8.802619934082031, 399.18328857421875, 78.01386260986328, 307.32403564453125, 692.9985961914062, 88.00527954101562, -269.4757995605469, 549.2059326171875, 201.06674194335938, -74.687255859375, 393.50408935546875, 113.76791381835938, 184.59866333007812, 697.2726440429688, 96.98855590820312, 264.8067932128906, 663.7733154296875, 112.16204071044922, 195.88140869140625, 314.29193115234375, -256.10198974609375, 234.99209594726562, 760.6439208984375, 217.49392700195312, 402.358154296875, 131.4632568359375, 776.0594482421875, 922.7520141601562, 83.11436462402344, 114.87840270996094, 392.88250732421875, 384.9749755859375, 10.899715423583984, -373.6860656738281, 323.141357421875, -141.24676513671875, 495.2157287597656, 461.71282958984375, 726.1737060546875, -109.07292938232422, 230.43618774414062, -59.00389099121094, 12.298151016235352], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000452.npy"}
|
||||
{"epoch": 0.6637298091042585, "step": 453, "batch_size": 64, "mean": 266.5997314453125, "std": 360.1244812011719, "min": -357.9365234375, "p10": -144.18380279541014, "median": 196.67718505859375, "p90": 697.9859985351563, "max": 1648.50390625, "pos_frac": 0.75, "sample": [584.9031372070312, 586.5772094726562, -150.80279541015625, 115.72814178466797, 227.70028686523438, 23.666336059570312, 330.7523193359375, 165.28636169433594, 138.98570251464844, 8.6527099609375, 203.74932861328125, 549.8754272460938, 544.6217651367188, 648.8798217773438, 15.23282241821289, 112.89283752441406, 330.569580078125, 212.16204833984375, -121.27461242675781, 76.54750061035156, 590.4981079101562, -183.8529815673828, 226.15060424804688, 352.3062744140625, 669.0709228515625, -196.52301025390625, -128.73948669433594, 710.378173828125, -357.9365234375, 329.1837158203125, -51.59593200683594, 173.08743286132812, 555.1791381835938, 849.81396484375, 887.0062255859375, 16.236804962158203, -68.14518737792969, 142.73944091796875, 1648.50390625, 787.0469970703125, 433.0538024902344, -115.90511322021484, 854.8602905273438, -16.43195343017578, 269.09417724609375, 102.66169738769531, 494.25054931640625, 216.4041748046875, 383.73687744140625, 441.71600341796875, -196.2606964111328, 494.12127685546875, -152.21240234375, 588.33349609375, 47.405029296875, -66.14289093017578, -176.35948181152344, 189.60504150390625, 1019.595947265625, 441.705322265625, -23.435779571533203, -22.22180938720703, 139.94094848632812, 159.75515747070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000453.npy"}
|
||||
{"epoch": 0.6651982378854625, "step": 454, "batch_size": 64, "mean": 301.10711669921875, "std": 382.0713195800781, "min": -683.4032592773438, "p10": -43.76784362792965, "median": 248.8004608154297, "p90": 738.7660034179688, "max": 1517.018310546875, "pos_frac": 0.875, "sample": [243.66085815429688, 10.6378173828125, 1020.8978881835938, 283.72906494140625, 513.546875, 51.26496887207031, 708.820068359375, 355.79937744140625, 3.0687599182128906, 225.6746063232422, -616.411376953125, 623.10107421875, 360.9559326171875, 319.3846435546875, 179.03927612304688, 253.9400634765625, 393.7707214355469, 411.77752685546875, 17.73767852783203, -683.4032592773438, 71.96397399902344, 572.4288940429688, -70.76083374023438, 790.1731567382812, -352.8545227050781, -59.860809326171875, 751.5999755859375, 70.53507232666016, 93.03321838378906, 243.2723388671875, 499.6870422363281, 139.0299530029297, 384.7255859375, 89.09297943115234, 433.4318542480469, 137.67855834960938, 217.74349975585938, 204.55239868164062, 282.9793701171875, 140.3090057373047, 429.529052734375, 461.1978454589844, 111.75166320800781, 383.5776672363281, -6.21759033203125, 685.56005859375, 1451.4677734375, 367.791015625, 37.14722442626953, 167.74688720703125, 1517.018310546875, 1032.3865966796875, 302.6607360839844, 508.00775146484375, 89.11894989013672, 447.1076354980469, 667.7526245117188, -86.78195190429688, 413.3758850097656, -116.30912780761719, 165.2631072998047, 68.35353088378906, 827.6546630859375, 29.940906524658203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000454.npy"}
|
||||
{"epoch": 0.6666666666666666, "step": 455, "batch_size": 64, "mean": 344.6717224121094, "std": 409.9918518066406, "min": -677.155029296875, "p10": -53.10468177795406, "median": 240.952392578125, "p90": 1022.7577819824221, "max": 1350.63720703125, "pos_frac": 0.875, "sample": [127.83683776855469, 134.8743896484375, 776.7949829101562, 1041.311767578125, -485.2024230957031, 331.2119445800781, 663.8856811523438, -68.69879150390625, 1149.8643798828125, -213.26303100585938, 1060.757080078125, 239.61561584472656, 979.4651489257812, -248.42404174804688, 1142.3045654296875, -111.04930877685547, 235.2249755859375, 712.0243530273438, 210.04867553710938, 188.0291290283203, 62.089324951171875, 476.1331481933594, 243.0299530029297, 25.478403091430664, 415.0732421875, 1121.771484375, 685.1383666992188, -168.73602294921875, 245.51487731933594, 343.60986328125, 241.34320068359375, 577.9974365234375, 240.56158447265625, 200.47634887695312, 137.7725830078125, 179.01953125, 216.50369262695312, 273.6405944824219, 82.69229125976562, -677.155029296875, 382.7703857421875, 84.14727783203125, 256.351806640625, 585.5364990234375, 45.1483154296875, 84.9660415649414, 0.5251178741455078, 477.14996337890625, 842.5054931640625, 212.66751098632812, 680.3314208984375, 76.2491455078125, 418.350830078125, 444.5119934082031, 63.10816192626953, 114.68400573730469, -16.718425750732422, 846.939208984375, 462.300048828125, 440.6396789550781, 1177.048583984375, 208.50686645507812, 32.06635665893555, 1350.63720703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000455.npy"}
|
||||
{"epoch": 0.6681350954478708, "step": 456, "batch_size": 64, "mean": 337.71661376953125, "std": 345.2431640625, "min": -182.8037109375, "p10": -8.60530185699462, "median": 260.7631530761719, "p90": 838.9914367675783, "max": 1412.659912109375, "pos_frac": 0.875, "sample": [164.24237060546875, 228.0537567138672, 765.0875854492188, 74.26651000976562, -25.4049072265625, 306.7081604003906, 96.92379760742188, 62.118255615234375, 251.9866943359375, 778.4912719726562, 360.953857421875, 26.259889602661133, 112.62532043457031, 1412.659912109375, 294.43359375, -105.11200714111328, 11.068008422851562, 315.5289611816406, 317.69500732421875, 428.0281066894531, 269.53961181640625, 63.444664001464844, -70.73928833007812, 1064.511474609375, -82.63299560546875, 404.12066650390625, 1323.198974609375, 372.8123779296875, 610.5302734375, 76.53903198242188, 311.5205078125, 513.1767578125, 576.6102905273438, 185.62045288085938, 176.32418823242188, 234.16026306152344, -0.10200309753417969, 459.556640625, 926.0693969726562, 94.19503784179688, 785.0233764648438, 424.19732666015625, 320.1085205078125, 71.23589324951172, -182.8037109375, 343.7315673828125, 121.49146270751953, -47.585113525390625, 862.12060546875, 118.94893646240234, 876.3235473632812, 1062.96337890625, 330.1416320800781, -12.24957275390625, 233.04861450195312, 100.07585144042969, 406.1325378417969, 116.4180908203125, 472.8011474609375, 110.13660430908203, 164.20639038085938, 571.1737060546875, 778.130615234375, 203.0207977294922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000456.npy"}
|
||||
{"epoch": 0.6696035242290749, "step": 457, "batch_size": 64, "mean": 337.23883056640625, "std": 326.47406005859375, "min": -305.478515625, "p10": -40.06490287780761, "median": 364.5570373535156, "p90": 738.7369873046875, "max": 1125.901611328125, "pos_frac": 0.84375, "sample": [-41.78978729248047, 423.9132080078125, 436.62017822265625, 348.341064453125, 525.9949340820312, 733.6329345703125, 823.094970703125, 699.7778930664062, 63.663963317871094, 52.379180908203125, 1104.139404296875, 440.38861083984375, 56.78659439086914, 205.4412384033203, 106.14009094238281, 236.1082000732422, 513.4722290039062, 223.62448120117188, 120.29752349853516, 549.9127807617188, 740.9244384765625, -13.02950668334961, 169.0271759033203, 451.0578308105469, 477.77838134765625, 946.3378295898438, -302.7081298828125, -193.1418914794922, 617.8446655273438, 634.2575073242188, 56.27058410644531, 511.4823913574219, 491.6964416503906, 533.54345703125, 323.80029296875, -81.01112365722656, 648.486328125, 486.5537414550781, 109.05758666992188, 17.7574462890625, 343.31689453125, 860.4943237304688, 440.2421875, 119.79463195800781, 47.72067642211914, 150.14796447753906, 599.740234375, -23.97356414794922, 527.5955200195312, 380.77301025390625, -123.88109588623047, 646.2655029296875, 266.67877197265625, 803.138671875, -305.478515625, 386.115234375, 30.204269409179688, -148.93966674804688, 433.181640625, -36.0401725769043, 206.55523681640625, 525.9246215820312, 79.88115692138672, 1125.901611328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000457.npy"}
|
||||
{"epoch": 0.671071953010279, "step": 458, "batch_size": 64, "mean": 306.4158935546875, "std": 368.9295654296875, "min": -439.05279541015625, "p10": -123.0059455871582, "median": 303.4040222167969, "p90": 626.4427612304689, "max": 1778.630615234375, "pos_frac": 0.796875, "sample": [-439.05279541015625, 641.5076904296875, 446.89569091796875, 267.1802673339844, 538.907470703125, -43.32987976074219, 1778.630615234375, -205.7944793701172, 301.7501220703125, 493.7596740722656, 224.01834106445312, 302.5474853515625, 524.6517944335938, 287.0892639160156, 719.11767578125, -117.35455322265625, -130.6161651611328, -15.44331169128418, 244.29971313476562, 568.6135864257812, 470.7998352050781, 469.7337951660156, 86.75627899169922, 515.0906372070312, 360.61688232421875, 357.50714111328125, 385.6229248046875, 49.6527099609375, 381.86297607421875, 451.6101989746094, 313.6121520996094, 739.5633544921875, 484.6319274902344, 815.755126953125, 135.63040161132812, -24.2459716796875, 343.47747802734375, 74.96723937988281, 471.7281494140625, 304.26055908203125, 698.7413330078125, 365.9841613769531, -110.9011459350586, -340.6380310058594, 299.73370361328125, -264.258056640625, 180.0205078125, -125.42797088623047, 198.70831298828125, 1592.0283203125, 499.416015625, 222.11038208007812, 295.0863952636719, 85.07337951660156, 202.20501708984375, 591.291259765625, 324.7650146484375, 431.3565673828125, 253.6185760498047, 313.8272399902344, -113.74781036376953, 431.1087951660156, -257.1632385253906, 261.66583251953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000458.npy"}
|
||||
{"epoch": 0.6725403817914831, "step": 459, "batch_size": 64, "mean": 331.6836853027344, "std": 407.98236083984375, "min": -758.259765625, "p10": -147.27821502685543, "median": 306.6326141357422, "p90": 822.6238220214844, "max": 1475.5008544921875, "pos_frac": 0.828125, "sample": [70.71891784667969, 50.605133056640625, 435.0791015625, 165.97308349609375, 258.36041259765625, 385.00543212890625, -235.03543090820312, 156.8815155029297, 568.25390625, -299.4705810546875, 581.93994140625, 400.583740234375, 452.5840759277344, 527.6680908203125, 23.061460494995117, 460.96539306640625, -44.932830810546875, 607.3336181640625, 319.99810791015625, 889.8875732421875, 23.8104248046875, 867.1599731445312, 641.25390625, -110.97660827636719, 343.4040222167969, -238.4780731201172, 693.73291015625, -116.93901062011719, 120.51203918457031, 53.261878967285156, 245.5308380126953, -21.94994354248047, 829.73291015625, -758.259765625, 1475.5008544921875, 300.87469482421875, 1256.789794921875, 547.9690551757812, 227.24574279785156, -294.5423278808594, 195.29232788085938, 266.1169738769531, 481.23199462890625, 243.77630615234375, 67.274169921875, 278.70599365234375, -358.0869445800781, 179.40106201171875, 429.8945007324219, 635.0777587890625, 732.98193359375, 94.55717468261719, 379.2371826171875, 1229.8763427734375, 312.3905334472656, 90.67797088623047, 982.5320434570312, -160.28073120117188, 787.9330444335938, 772.994384765625, 806.0359497070312, 327.6905517578125, 23.990055084228516, 567.36181640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000459.npy"}
|
||||
{"epoch": 0.6740088105726872, "step": 460, "batch_size": 64, "mean": 357.0635070800781, "std": 388.44488525390625, "min": -301.51031494140625, "p10": -136.0722457885742, "median": 322.5360412597656, "p90": 887.1218811035156, "max": 1483.50927734375, "pos_frac": 0.8125, "sample": [766.6982421875, 657.16064453125, 508.6838073730469, -129.3579559326172, 106.48316955566406, 890.408935546875, 902.54736328125, 325.7181701660156, -13.352272033691406, 226.10475158691406, -138.94979858398438, 691.3106079101562, 125.80619812011719, 590.595458984375, 199.47715759277344, 270.76708984375, 290.0373229980469, -301.51031494140625, 174.6988067626953, 1338.849853515625, 1242.5220947265625, 210.6328125, 505.3775939941406, 29.026123046875, 952.484375, 306.9683532714844, 1483.50927734375, 407.4256591796875, -117.6096420288086, 26.10565185546875, 289.7858581542969, -161.28317260742188, 529.3480834960938, -188.3064727783203, 470.4658508300781, 569.0485229492188, -99.2113265991211, 436.72454833984375, 319.3539123535156, -0.5747299194335938, 868.44677734375, 632.8690185546875, 531.4008178710938, 345.7208557128906, 477.0113525390625, 127.66751098632812, 116.58921813964844, 102.71189880371094, 371.3390808105469, 253.3671417236328, -197.4661102294922, 76.58760833740234, 120.81719207763672, 403.7592468261719, -256.50079345703125, 572.0438232421875, 895.450439453125, 580.1681518554688, -213.16744995117188, 879.4520874023438, 549.2493286132812, 420.0487060546875, 64.91576385498047, 435.6119384765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000460.npy"}
|
||||
{"epoch": 0.6754772393538914, "step": 461, "batch_size": 64, "mean": 282.1562805175781, "std": 402.6361389160156, "min": -500.6500244140625, "p10": -167.44052124023432, "median": 210.50312042236328, "p90": 704.9762451171877, "max": 1599.95751953125, "pos_frac": 0.75, "sample": [114.39334106445312, -18.945877075195312, 653.252685546875, 343.7588195800781, -191.3558349609375, 534.3829345703125, -500.6500244140625, 1324.7166748046875, 85.07192993164062, 616.107666015625, 145.04598999023438, -48.56385040283203, 262.18499755859375, 636.2051391601562, 993.8883056640625, 458.9755859375, 587.0526733398438, -111.63812255859375, 208.9717559814453, 286.926513671875, 341.25384521484375, 648.3473510742188, 723.6695556640625, 1006.462158203125, -193.97816467285156, -47.78399658203125, 1155.914306640625, 1599.95751953125, 173.06076049804688, 381.197998046875, 437.4135437011719, 200.1927947998047, 237.780029296875, 29.38299560546875, 88.8602294921875, 275.29345703125, -271.6722412109375, -26.38787841796875, -100.99069213867188, 12.411041259765625, 821.2127075195312, -83.04100799560547, 657.0346069335938, 661.3585205078125, -236.6420135498047, 446.5386047363281, 205.9336700439453, 526.30322265625, 20.35211181640625, -95.18585205078125, 600.5068359375, 131.95614624023438, 102.85939025878906, 203.03533935546875, 517.8668212890625, -87.62818145751953, -266.8943176269531, 113.31582641601562, 293.47369384765625, 231.941162109375, 129.6741943359375, -400.03021240234375, 301.85955810546875, 212.03448486328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000461.npy"}
|
||||
{"epoch": 0.6769456681350955, "step": 462, "batch_size": 64, "mean": 230.16461181640625, "std": 419.6398620605469, "min": -644.7933959960938, "p10": -259.81098022460935, "median": 185.25746154785156, "p90": 655.8601806640628, "max": 1392.3331298828125, "pos_frac": 0.703125, "sample": [400.7628479003906, 58.363380432128906, 9.922950744628906, -186.67494201660156, 449.5776672363281, 583.9876708984375, 29.928239822387695, 493.0987854003906, 574.9708862304688, 814.4902954101562, 534.6349487304688, -211.81637573242188, -413.54461669921875, -16.88113784790039, 163.09326171875, 473.61883544921875, 526.3123779296875, 146.56277465820312, -247.65228271484375, 684.2672729492188, -270.0320739746094, 81.90573120117188, 333.1772766113281, -644.7933959960938, -124.57081604003906, 49.829864501953125, 589.5769653320312, -475.4342041015625, 22.289840698242188, 6.564258575439453, 1306.8165283203125, 401.5647277832031, -265.0218505859375, 258.1617431640625, 578.49755859375, 1014.3136596679688, -227.01358032226562, -60.43262481689453, 1392.3331298828125, 251.6544647216797, 145.44593811035156, -146.15904235839844, -178.3875274658203, 204.51483154296875, -281.7807922363281, -78.73707580566406, 368.0049743652344, 491.70782470703125, -304.1813049316406, 166.00009155273438, -78.07687377929688, 423.0684814453125, 1318.254638671875, 505.05517578125, 395.18804931640625, -145.94134521484375, 304.5865478515625, 505.6812438964844, 795.814208984375, 552.8329467773438, 74.80421447753906, 152.97726440429688, 246.52394104003906, 206.92779541015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000462.npy"}
|
||||
{"epoch": 0.6784140969162996, "step": 463, "batch_size": 64, "mean": 307.53863525390625, "std": 450.5167236328125, "min": -482.8091125488281, "p10": -226.43145599365232, "median": 270.4690856933594, "p90": 855.0407409667971, "max": 1515.783447265625, "pos_frac": 0.71875, "sample": [57.20598602294922, 570.483154296875, -17.906625747680664, 282.32183837890625, -140.72515869140625, 386.3114013671875, -165.80731201171875, -8.332527160644531, 461.0097351074219, 1.0801544189453125, 598.5361938476562, -237.83108520507812, 620.4573364257812, 661.8897705078125, 180.86483764648438, 384.74761962890625, 338.568115234375, 667.3374633789062, 790.2589111328125, -98.88243103027344, 21.865602493286133, 1515.783447265625, 395.2465515136719, 273.40716552734375, 1263.8868408203125, 665.7440185546875, 173.91677856445312, 292.2471008300781, -19.777069091796875, 187.50125122070312, -294.7873229980469, -21.957984924316406, -482.8091125488281, 77.10352325439453, 808.2019653320312, -420.53955078125, -73.05097961425781, -185.33441162109375, 405.1461486816406, -199.8323211669922, 589.74951171875, 321.47113037109375, 684.7131958007812, 254.72305297851562, 781.3396606445312, 1060.443359375, -22.425750732421875, 267.531005859375, 952.5778198242188, -339.39813232421875, 173.2831268310547, 1342.3310546875, 241.25209045410156, 875.114501953125, -338.6005859375, 14.580963134765625, 385.3993225097656, 259.8720703125, -468.33709716796875, 412.7036437988281, 471.2353820800781, 551.3339233398438, 1319.928955078125, 178.1019744873047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000463.npy"}
|
||||
{"epoch": 0.6798825256975036, "step": 464, "batch_size": 64, "mean": 471.21575927734375, "std": 482.2331848144531, "min": -509.9890441894531, "p10": -47.25964775085446, "median": 444.02227783203125, "p90": 963.0181518554688, "max": 2129.5, "pos_frac": 0.875, "sample": [-252.3745880126953, 956.4931640625, 367.1675109863281, 226.22653198242188, 5.87652587890625, 991.2320556640625, 687.9369506835938, 138.4839630126953, 84.86642456054688, 479.69439697265625, 698.923583984375, 627.37548828125, -482.1011657714844, 798.219970703125, 302.6344909667969, 506.25726318359375, 705.330078125, 217.4087371826172, 370.92523193359375, 784.7013549804688, 793.3574829101562, 423.121826171875, 803.2158203125, -128.6420440673828, 1101.077880859375, 248.66409301757812, 592.7326049804688, 495.7630615234375, -509.9890441894531, 165.51898193359375, 844.6142578125, 210.265869140625, 494.43939208984375, 51.19961929321289, 363.9273376464844, 581.650634765625, 755.28564453125, 247.93531799316406, 464.9227294921875, 286.28125, 137.7470703125, -15.715633392333984, 893.3907470703125, 623.3681030273438, 91.99250793457031, 411.5663146972656, -123.29364776611328, 311.400146484375, 15.353965759277344, 644.0797119140625, 556.2887573242188, 1523.422119140625, 2129.5, 1249.810791015625, 422.33294677734375, 965.8145751953125, 836.464599609375, -60.77851104736328, 1767.8902587890625, 744.3651733398438, 582.373046875, -456.0389709472656, 310.94757080078125, 94.9059829711914], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000464.npy"}
|
||||
{"epoch": 0.6813509544787077, "step": 465, "batch_size": 64, "mean": 377.061279296875, "std": 517.4326171875, "min": -1484.7552490234375, "p10": -179.09581604003904, "median": 426.2876739501953, "p90": 873.3167846679688, "max": 1833.9925537109375, "pos_frac": 0.859375, "sample": [736.7205810546875, 1486.706787109375, 549.5875244140625, 230.7356414794922, -247.99102783203125, 7.735992431640625, 632.3499755859375, 357.5987548828125, 69.4009780883789, 9.960235595703125, 797.244873046875, 271.8990478515625, 294.2487487792969, 56.57823944091797, 586.563232421875, 641.3198852539062, 478.1617126464844, 427.1206970214844, 201.7582550048828, 59.83495330810547, -784.2081909179688, 1017.5836181640625, 476.43841552734375, 215.438232421875, 1833.9925537109375, -191.36952209472656, 563.3563842773438, 143.423095703125, 336.32958984375, 525.35693359375, 686.9523315429688, 458.15704345703125, -161.30569458007812, 150.52978515625, 235.53530883789062, 562.206787109375, 8.524269104003906, 325.70843505859375, -369.6532287597656, 738.1041259765625, 912.0997924804688, 221.40225219726562, 545.7821655273438, -940.2340698242188, 760.982177734375, 699.0929565429688, 170.8566436767578, 826.5651245117188, 100.29368591308594, 875.2276000976562, 774.2418212890625, -186.72015380859375, 1105.4759521484375, -1484.7552490234375, 849.8228759765625, 868.8582153320312, -92.08250427246094, 425.45465087890625, 664.6044921875, 473.9266662597656, 58.22630310058594, 423.55889892578125, 887.240234375, 773.3663330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000465.npy"}
|
||||
{"epoch": 0.6828193832599119, "step": 466, "batch_size": 64, "mean": 341.78094482421875, "std": 496.98699951171875, "min": -1121.94140625, "p10": -315.6764465332031, "median": 297.3949890136719, "p90": 978.0736694335938, "max": 1435.2650146484375, "pos_frac": 0.75, "sample": [587.8617553710938, -418.48760986328125, 201.10784912109375, -285.05413818359375, 822.9441528320312, 317.3304443359375, 613.6589965820312, 135.7225799560547, 742.88916015625, 103.28147888183594, 210.84100341796875, 1182.94580078125, -450.33514404296875, 866.9312744140625, -40.62301254272461, 172.21978759765625, 334.14508056640625, -10.378215789794922, 463.9591369628906, 217.57843017578125, 579.0321044921875, 203.71441650390625, 456.3294677734375, 406.90252685546875, 909.3926391601562, 169.89462280273438, -34.234764099121094, 833.778076171875, 277.45953369140625, 583.5787963867188, 411.1136474609375, 160.65966796875, 865.150390625, 1435.2650146484375, -1121.94140625, 964.1693725585938, 415.9023132324219, 733.907470703125, 1114.55908203125, 427.63262939453125, 984.0326538085938, 6.142704010009766, 165.57061767578125, 222.81930541992188, 500.392578125, 1220.17138671875, -114.76678466796875, -491.8033752441406, -33.076576232910156, 87.6370849609375, 1142.385498046875, 275.3236999511719, 860.1741333007812, -83.7767105102539, 931.2314453125, 426.8447265625, -561.1829833984375, -0.12823486328125, 1037.0389404296875, -328.80029296875, 348.6645202636719, -119.97039031982422, -381.0810241699219, 219.33425903320312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000466.npy"}
|
||||
{"epoch": 0.684287812041116, "step": 467, "batch_size": 64, "mean": 359.0609130859375, "std": 682.378173828125, "min": -1274.7847900390625, "p10": -468.9119598388672, "median": 360.13197326660156, "p90": 1218.6074829101565, "max": 1946.6741943359375, "pos_frac": 0.71875, "sample": [726.9331665039062, 1817.610107421875, 741.3187866210938, 1341.7122802734375, -562.453125, -1274.7847900390625, 648.4400634765625, 480.5943908691406, 480.3026123046875, 339.7197570800781, 246.14608764648438, 470.46295166015625, 1838.1407470703125, 164.56712341308594, 507.69647216796875, 475.93927001953125, 116.19562530517578, 1124.8013916015625, 1542.3408203125, 436.215576171875, 380.544189453125, -94.44490051269531, 80.58807373046875, 894.4222412109375, -374.576416015625, -429.5444030761719, -753.4788208007812, 500.7770690917969, 1243.690185546875, 403.53826904296875, 1103.2122802734375, 162.1938018798828, 472.127685546875, -447.8422546386719, 1041.4119873046875, -13.842376708984375, 1248.5914306640625, 195.79827880859375, -132.81005859375, 961.4218139648438, 257.6162109375, -1116.7137451171875, 219.537109375, -321.4642333984375, 83.52064514160156, 445.37744140625, 29.105712890625, -317.16412353515625, -493.315673828125, 728.6107788085938, 846.5956420898438, 812.7276611328125, 1160.0811767578125, 324.6983947753906, 934.50634765625, -57.82068634033203, 645.2979125976562, -33.035919189453125, -477.94183349609375, 1946.6741943359375, -1029.60302734375, -27.96707534790039, 88.37694549560547, 228.519775390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000467.npy"}
|
||||
{"epoch": 0.6857562408223201, "step": 468, "batch_size": 64, "mean": 450.0086364746094, "std": 589.9862060546875, "min": -1007.7269287109375, "p10": -103.75178375244136, "median": 379.3933868408203, "p90": 1385.264721679688, "max": 2364.73291015625, "pos_frac": 0.859375, "sample": [487.3189392089844, 38.214622497558594, 1437.334716796875, 554.1893310546875, 1501.8809814453125, 984.9978637695312, 585.2847900390625, 641.5819091796875, 453.3036804199219, 519.1485595703125, 324.6730041503906, 659.8603515625, 142.66925048828125, 403.2370300292969, 743.86181640625, 96.65936279296875, 212.4217071533203, -180.17620849609375, 380.8321838378906, 255.35626220703125, 192.6102294921875, 13.237041473388672, 1.2679977416992188, 377.2906188964844, 395.7687072753906, 1485.8009033203125, 377.95458984375, 1558.620361328125, 1015.8695068359375, 95.96798706054688, -431.2123107910156, -12.813583374023438, 667.686767578125, 352.4720458984375, 445.0615539550781, 625.484375, 126.13288116455078, 113.38995361328125, 654.2996215820312, -1007.7269287109375, 1902.20849609375, 83.90785217285156, 443.8524169921875, 461.0275573730469, 368.7236328125, 130.30313110351562, -252.8262939453125, 466.8876647949219, 10.161361694335938, 1263.76806640625, 333.6167907714844, 2005.8262939453125, 210.98248291015625, 733.5145263671875, 3.0383377075195312, 452.5045471191406, 579.2252197265625, 2364.73291015625, 560.894287109375, -127.78216552734375, 274.04669189453125, -504.26593017578125, -47.68089294433594, -205.92596435546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000468.npy"}
|
||||
{"epoch": 0.6872246696035242, "step": 469, "batch_size": 64, "mean": 450.6948547363281, "std": 512.0138549804688, "min": -662.6998901367188, "p10": -131.16901473999016, "median": 465.92274475097656, "p90": 1174.4215820312502, "max": 1899.1708984375, "pos_frac": 0.828125, "sample": [1049.121826171875, 588.49462890625, 93.15294647216797, 550.2499389648438, 1899.1708984375, 558.7845458984375, 1155.927978515625, 64.15447235107422, -11.525131225585938, 208.1312255859375, 551.474365234375, 114.24021911621094, -521.5919799804688, 75.92398071289062, 1153.25439453125, 16.39604949951172, 500.9178161621094, 612.2191162109375, 487.29925537109375, 520.4967041015625, 1205.3035888671875, 230.23660278320312, 165.3335418701172, 184.0535125732422, 1140.355712890625, 1.4036865234375, 874.2189331054688, 522.3076171875, 1182.347412109375, -279.960205078125, 702.0090942382812, -190.1627960205078, 845.4375, 273.4076232910156, 681.1294555664062, 338.874755859375, -48.785316467285156, -162.43569946289062, 9.561981201171875, -438.65484619140625, 560.6546020507812, 852.7488403320312, 1206.8531494140625, 26.730663299560547, -390.76617431640625, 183.52969360351562, 251.83724975585938, 1205.677001953125, 349.63330078125, 124.96289825439453, 1086.958740234375, 912.4531860351562, 444.5462341308594, 1186.8878173828125, 1236.16455078125, 677.6077880859375, 598.4154052734375, 436.6893615722656, -20.337539672851562, -58.213417053222656, 196.17135620117188, 744.1047973632812, 791.5859375, -662.6998901367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000469.npy"}
|
||||
{"epoch": 0.6886930983847284, "step": 470, "batch_size": 64, "mean": 398.8236083984375, "std": 538.6472778320312, "min": -1263.424560546875, "p10": -284.8109710693359, "median": 433.1428985595703, "p90": 1019.1655944824219, "max": 1773.924560546875, "pos_frac": 0.765625, "sample": [453.7706298828125, 471.9771728515625, -92.01025390625, 98.64795684814453, 238.64895629882812, -119.55184173583984, 294.548095703125, 113.48897552490234, -62.517059326171875, -643.85302734375, 531.7461547851562, 776.8467407226562, 582.0706787109375, 365.1039733886719, 647.6290283203125, 1014.9122314453125, -303.60968017578125, 763.9796752929688, -1263.424560546875, 724.0870971679688, -127.43869018554688, 809.7339477539062, 179.09896850585938, -375.09649658203125, 266.69964599609375, 193.27993774414062, -293.9409484863281, 379.02734375, 458.5948486328125, 412.5151672363281, 695.1997680664062, -307.267822265625, 1439.5938720703125, 161.50804138183594, 78.40111541748047, 742.7283935546875, 512.2620849609375, 598.5633544921875, 1773.924560546875, 631.1441040039062, 1420.116455078125, 95.92086791992188, 768.1854248046875, 1088.4107666015625, -99.81478881835938, 1472.8792724609375, 520.20947265625, 574.8848876953125, 716.3212890625, 1008.31640625, 388.9713134765625, -263.5076904296875, -147.6883544921875, 257.5817565917969, 582.3162231445312, 763.4423828125, -313.2933349609375, 1279.3411865234375, 799.75341796875, -170.29969787597656, 1020.9884643554688, 133.24356079101562, 321.5269470214844, 485.8819885253906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000470.npy"}
|
||||
{"epoch": 0.6901615271659325, "step": 471, "batch_size": 64, "mean": 323.36785888671875, "std": 532.7197875976562, "min": -840.7374267578125, "p10": -272.0084014892578, "median": 234.91592407226562, "p90": 876.0324584960939, "max": 1959.2545166015625, "pos_frac": 0.71875, "sample": [1233.6639404296875, -22.588134765625, -19.8868408203125, 713.76220703125, 379.489990234375, 736.6533203125, 609.7817993164062, 1959.2545166015625, 973.8985595703125, 298.4970703125, -2.8274078369140625, 297.4722900390625, -146.30982971191406, 41.196685791015625, 365.5574645996094, -52.606082916259766, -226.2681884765625, -236.16319274902344, 207.025146484375, -44.47885513305664, 1728.096923828125, -338.587646484375, 262.80670166015625, 386.9826965332031, 701.113037109375, -46.24610137939453, -352.4700622558594, -39.39337158203125, -441.7909240722656, 515.3870239257812, -263.6196594238281, 946.4036865234375, 749.488525390625, 894.7662353515625, 486.2480773925781, 724.3209228515625, 825.3074951171875, 142.57081604003906, 200.4625701904297, 574.3447265625, 171.01168823242188, 177.63662719726562, 64.85411834716797, 5.932933807373047, 778.2425537109375, 1847.3350830078125, 106.36859130859375, -840.7374267578125, 754.1112060546875, 32.760711669921875, -383.98504638671875, 89.44818878173828, 61.519630432128906, 351.99432373046875, 74.0860595703125, 620.1255493164062, 9.097574234008789, -275.60357666015625, 392.642822265625, 301.2445373535156, 712.3094482421875, 832.3203125, -377.3843078613281, 468.89495849609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000471.npy"}
|
||||
{"epoch": 0.6916299559471366, "step": 472, "batch_size": 64, "mean": 403.055419921875, "std": 653.3126831054688, "min": -1261.796875, "p10": -183.43177947998043, "median": 246.7999725341797, "p90": 1346.7478393554693, "max": 2382.176025390625, "pos_frac": 0.78125, "sample": [-1261.796875, -404.435302734375, 828.725341796875, -450.4009704589844, 206.4113006591797, 146.12759399414062, 509.16546630859375, -164.30682373046875, -24.987943649291992, -734.4321899414062, 254.64804077148438, 51.351749420166016, 138.2455291748047, 438.9685974121094, 51.472747802734375, 906.5357666015625, -32.03303527832031, 691.4992065429688, 341.8834228515625, 5.777099609375, 1222.052734375, 83.246826171875, -31.6129150390625, 16.796646118164062, 238.951904296875, 500.71820068359375, 330.8976135253906, -192.92538452148438, 272.1572265625, 915.9398803710938, 35.89495849609375, 995.1829833984375, 196.65933227539062, 966.5, 2256.53125, 1711.845947265625, 115.04520416259766, 1455.99853515625, 307.10638427734375, 619.019775390625, 359.71710205078125, 966.4915771484375, 100.67350769042969, 283.3662109375, -191.62818908691406, 1407.625244140625, 1759.101318359375, 137.5759735107422, 1400.1885986328125, 111.93312072753906, 2382.176025390625, -113.66368103027344, -154.18727111816406, -85.9997787475586, 1162.2955322265625, 730.5380859375, 728.8156127929688, 188.92135620117188, 361.57452392578125, -341.89215087890625, 273.2239074707031, 427.0951232910156, 231.04080200195312, 156.13780212402344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000472.npy"}
|
||||
{"epoch": 0.6930983847283406, "step": 473, "batch_size": 64, "mean": 184.0141143798828, "std": 789.5870971679688, "min": -1951.761474609375, "p10": -498.3581237792969, "median": 182.93364715576172, "p90": 1049.4492309570317, "max": 3533.013916015625, "pos_frac": 0.6875, "sample": [-178.87106323242188, 805.6636352539062, 268.8082275390625, -238.30885314941406, -499.2644958496094, 1253.35986328125, -90.49737548828125, 163.14508056640625, 155.78395080566406, 300.8338317871094, 1184.287109375, 652.1663208007812, -74.48652648925781, 160.92388916015625, -586.2328491210938, -425.5433349609375, 42.516204833984375, 56.75325012207031, 199.0685577392578, 1272.419189453125, -290.4566650390625, -87.83778381347656, 659.697265625, 292.0049743652344, 156.62539672851562, 137.088623046875, 332.60260009765625, 327.84722900390625, 204.13589477539062, 97.30014038085938, -1951.761474609375, 106.93729400634766, -496.2432556152344, 183.75636291503906, -1685.6671142578125, -1746.89013671875, 3533.013916015625, 309.11322021484375, 407.1527099609375, 1226.87744140625, 179.4939422607422, 258.2385559082031, 768.6495971679688, 806.4384765625, 222.1959991455078, 671.81298828125, -571.15380859375, -368.5039367675781, 224.51307678222656, 975.7584228515625, 570.88427734375, 1081.031005859375, -406.19134521484375, -197.50164794921875, 101.42739868164062, -137.67578125, 813.7662963867188, -1517.77734375, 515.8064575195312, 182.11093139648438, -297.66082763671875, 302.18603515625, 217.46505737304688, 1243.768310546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000473.npy"}
|
||||
{"epoch": 0.6945668135095447, "step": 474, "batch_size": 64, "mean": 319.40814208984375, "std": 591.739990234375, "min": -1639.4959716796875, "p10": -250.70170288085936, "median": 321.06219482421875, "p90": 947.1848388671875, "max": 1746.7421875, "pos_frac": 0.765625, "sample": [-1139.5213623046875, 191.02536010742188, 138.14300537109375, 949.21533203125, 780.071533203125, -504.5249938964844, 942.447021484375, 154.81163024902344, 287.6983947753906, 588.9411010742188, 1129.2061767578125, 1746.7421875, 29.34718894958496, 298.3548889160156, 200.55465698242188, 788.0692138671875, 350.4602966308594, -8.591217041015625, -423.7791748046875, 476.1197814941406, 79.73841857910156, 331.4627990722656, -203.78460693359375, 121.51119995117188, 310.6615905761719, 656.7246704101562, 872.943115234375, 193.73431396484375, 518.8241577148438, 435.49090576171875, 693.3751831054688, -63.046382904052734, 62.58312225341797, 297.03717041015625, 506.864501953125, 281.9447021484375, 642.032470703125, 153.97772216796875, -189.95469665527344, -258.90374755859375, 760.012451171875, -96.10922241210938, 611.1065673828125, 487.6204833984375, 188.6683349609375, 343.1095275878906, 1347.95947265625, 1089.45361328125, -125.49159240722656, -771.3845825195312, -231.5635986328125, 883.9223022460938, 415.37054443359375, 614.4127197265625, 714.1090087890625, 230.65248107910156, 370.478759765625, -951.0686645507812, 691.7662963867188, -181.76242065429688, 436.87078857421875, 1199.2088623046875, -1639.4959716796875, 1636.26806640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000474.npy"}
|
||||
{"epoch": 0.6960352422907489, "step": 475, "batch_size": 64, "mean": 419.8092041015625, "std": 439.2447204589844, "min": -371.6594543457031, "p10": -144.52230072021484, "median": 408.4990234375, "p90": 1025.8115112304693, "max": 1465.427978515625, "pos_frac": 0.796875, "sample": [96.761962890625, -49.7474479675293, 442.1427307128906, 133.63198852539062, -194.01629638671875, 592.1257934570312, 1440.8551025390625, 450.13360595703125, 1184.1707763671875, 1077.194091796875, 285.6988830566406, 295.1533203125, 411.48944091796875, 11.9154052734375, 565.6566162109375, -371.6594543457031, 626.246337890625, -12.53645133972168, 789.05224609375, -140.39013671875, 760.5375366210938, 794.827880859375, 806.2930908203125, 213.06475830078125, 134.48336791992188, 319.08929443359375, 420.68316650390625, 409.374755859375, 29.633983612060547, 473.2099609375, 260.8011779785156, 511.3157043457031, -123.97792053222656, 248.0469512939453, 617.9575805664062, 827.587890625, 407.623291015625, 739.8606567382812, -221.82020568847656, 1307.24365234375, 440.44073486328125, -19.885498046875, 301.93914794921875, -177.8240509033203, 786.2039794921875, 107.62376403808594, 554.8305053710938, -146.29322814941406, 350.38543701171875, 594.9012451171875, -106.21279907226562, 905.9188232421875, 589.801025390625, 88.35897827148438, 1465.427978515625, -193.2017364501953, 222.83949279785156, 1402.42041015625, 701.4124755859375, 1094.3406982421875, 396.0540466308594, 798.3938598632812, -218.41336059570312, 358.6116027832031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000475.npy"}
|
||||
{"epoch": 0.697503671071953, "step": 476, "batch_size": 64, "mean": 334.79010009765625, "std": 462.4664306640625, "min": -508.9059753417969, "p10": -184.9171020507812, "median": 237.60406494140625, "p90": 915.3699462890626, "max": 2108.216552734375, "pos_frac": 0.796875, "sample": [800.7403564453125, 112.6016616821289, -121.92156219482422, 684.3453369140625, 300.8218994140625, 521.303955078125, 1304.3797607421875, 130.88192749023438, 120.65127563476562, -133.85247802734375, 1080.739013671875, 85.60822296142578, 153.7173309326172, 417.52105712890625, 887.6051025390625, 490.1744384765625, 579.1082763671875, 70.38436126708984, 225.01968383789062, 107.3549575805664, 565.4594116210938, 244.83197021484375, -10.918045043945312, 1231.937255859375, 501.7032165527344, 560.4843139648438, -466.938720703125, -268.9207763671875, 750.543212890625, 83.84508514404297, 9.047271728515625, 121.8536376953125, -305.85772705078125, -273.1372985839844, 516.1256103515625, -41.50352096557617, -206.80194091796875, -365.4068298339844, 687.9083251953125, 163.67193603515625, 230.08831787109375, 608.283447265625, 278.6363830566406, -127.34394836425781, 156.1794891357422, 229.24424743652344, 767.7390747070312, 1032.1580810546875, 2108.216552734375, 927.2691650390625, 313.3587646484375, -16.758529663085938, 275.9784851074219, 230.37615966796875, 150.5128631591797, 104.28298950195312, 574.9928588867188, 532.06494140625, 121.20808410644531, 351.3067626953125, 301.1824951171875, 1158.5093994140625, 312.8743896484375, -508.9059753417969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000476.npy"}
|
||||
{"epoch": 0.6989720998531571, "step": 477, "batch_size": 64, "mean": 328.0302429199219, "std": 572.0289306640625, "min": -1650.9566650390625, "p10": -279.51185607910156, "median": 279.2987518310547, "p90": 1011.5849243164063, "max": 2019.2642822265625, "pos_frac": 0.734375, "sample": [-496.07855224609375, 452.55487060546875, 928.6084594726562, -656.5773315429688, 1522.486328125, 231.69566345214844, 268.4151306152344, 852.3379516601562, 788.219970703125, 1049.696044921875, -25.694541931152344, 82.67337799072266, 221.89915466308594, 819.6810302734375, 966.7215576171875, -108.6242904663086, 691.3840942382812, 516.9186401367188, 1121.5430908203125, -288.21380615234375, 530.3999633789062, 173.15020751953125, -10.812164306640625, 201.7908172607422, -253.03323364257812, 701.4774169921875, 348.1373596191406, 131.92288208007812, 4.869539260864258, -96.59185791015625, -502.4795837402344, 447.4412536621094, -1650.9566650390625, -666.264892578125, 1273.7901611328125, 165.71139526367188, 230.54257202148438, 153.16754150390625, 490.025390625, 78.71817016601562, 662.6065673828125, -70.68359375, 572.302978515625, 1002.59130859375, 329.1665954589844, 317.0179748535156, 323.2388610839844, 830.1640014648438, 429.8741760253906, 190.60855102539062, 1032.29296875, -259.2073059082031, -55.540245056152344, 290.182373046875, -4.96612548828125, 1015.4393310546875, 622.7274169921875, -148.83175659179688, 31.441436767578125, -305.75, 114.78365325927734, 701.612548828125, 2019.2642822265625, 662.9468994140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000477.npy"}
|
||||
{"epoch": 0.7004405286343612, "step": 478, "batch_size": 64, "mean": 394.17974853515625, "std": 525.708740234375, "min": -906.61279296875, "p10": -214.84334564208982, "median": 399.0555114746094, "p90": 1196.601000976563, "max": 1659.6715087890625, "pos_frac": 0.8125, "sample": [-222.4006805419922, 157.60926818847656, 728.1958618164062, 99.68775177001953, 78.2162094116211, 455.28985595703125, 353.7084045410156, -497.0189208984375, 480.3590087890625, 8.297515869140625, 429.190673828125, 525.541748046875, -508.3531188964844, -528.091552734375, 281.9685974121094, 861.6159057617188, 770.058349609375, 1659.6715087890625, 418.2998046875, 163.11895751953125, -331.18170166015625, -197.20956420898438, -403.0333251953125, 201.45590209960938, 389.4344482421875, 1065.0355224609375, 481.35028076171875, 448.4509582519531, -46.826568603515625, 232.23976135253906, 1523.5721435546875, 1099.0955810546875, 312.85894775390625, 257.1913757324219, 449.94183349609375, 272.5896911621094, 1392.8455810546875, 1410.7110595703125, 639.92919921875, -124.65470886230469, 580.5841064453125, -66.89949798583984, 123.47824096679688, -108.2145004272461, 725.3129272460938, 462.6390686035156, 120.62317657470703, 1238.3890380859375, 238.8123016357422, 408.67657470703125, 532.032470703125, 8.300865173339844, 356.1798095703125, 829.4402465820312, 499.3912353515625, 303.1493835449219, 1465.1175537109375, 680.89990234375, 1314.5703125, -906.61279296875, 573.6398315429688, 436.1649169921875, 617.1790161132812, 5.8892364501953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000478.npy"}
|
||||
{"epoch": 0.7019089574155654, "step": 479, "batch_size": 64, "mean": 376.33001708984375, "std": 641.447509765625, "min": -1411.599853515625, "p10": -288.70386657714835, "median": 288.7840881347656, "p90": 1138.682958984375, "max": 2963.864501953125, "pos_frac": 0.734375, "sample": [50.047035217285156, 69.89730072021484, 1131.8406982421875, 598.005859375, 1033.3790283203125, 1168.0333251953125, -393.1756591796875, -214.84713745117188, 259.851806640625, 35.608367919921875, 780.0394287109375, -127.636962890625, -203.80563354492188, 317.71636962890625, 801.26123046875, 214.58367919921875, 223.9286346435547, -142.49114990234375, 937.1049194335938, 476.9103698730469, 201.56607055664062, 520.5316162109375, -120.87306213378906, 523.3290405273438, 2963.864501953125, 1402.851806640625, 682.0252075195312, 735.5342407226562, 746.2998046875, 482.8698425292969, 1254.9398193359375, 211.50143432617188, 191.75299072265625, 797.52734375, -615.7371215820312, 1752.634521484375, -24.436492919921875, 673.8067016601562, 454.2443542480469, 129.96824645996094, 426.94775390625, 1141.6153564453125, 696.6668701171875, 444.9764404296875, -89.37229919433594, -1411.599853515625, 401.22161865234375, -389.4241943359375, -320.35675048828125, -81.28070068359375, 155.26451110839844, 9.025413513183594, -475.1439208984375, -544.9871826171875, 95.77626037597656, -64.0097885131836, 553.266845703125, -34.44366455078125, 127.4395751953125, 1346.7763671875, 377.66802978515625, 1001.2900390625, 102.27095031738281, 635.0830078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000479.npy"}
|
||||
{"epoch": 0.7033773861967695, "step": 480, "batch_size": 64, "mean": 288.78948974609375, "std": 587.0894165039062, "min": -893.6109008789062, "p10": -511.00391235351555, "median": 314.5585479736328, "p90": 977.5253356933596, "max": 2195.574462890625, "pos_frac": 0.671875, "sample": [-554.7443237304688, 350.26617431640625, -158.27566528320312, 132.8062744140625, 583.253173828125, 59.982688903808594, 301.0711669921875, 997.842041015625, 613.806640625, 843.1168823242188, 493.51995849609375, 1363.3826904296875, 431.2899169921875, -422.98553466796875, -776.2463989257812, -617.5494384765625, 722.6122436523438, 853.4234619140625, -158.29489135742188, 298.7760009765625, 4.803245544433594, -110.40409851074219, 23.873367309570312, 413.31494140625, 431.4234313964844, 159.06040954589844, 2195.574462890625, 328.34271240234375, 607.0656127929688, -235.73207092285156, 834.2847900390625, 930.1196899414062, 896.348876953125, -192.36534118652344, -548.72607421875, 328.0459289550781, 513.3272094726562, -157.12290954589844, 452.37225341796875, -893.6109008789062, -774.115966796875, -687.3851318359375, 654.8740844726562, 1180.8870849609375, 1409.760986328125, -126.5538558959961, 345.5218200683594, 446.73699951171875, -40.012935638427734, 480.4943542480469, -8.311187744140625, -357.47125244140625, 133.7225341796875, 197.01950073242188, 160.41571044921875, -26.798412322998047, 93.21709442138672, -19.29922866821289, 632.2160034179688, 1110.5606689453125, 514.5875854492188, -216.5646209716797, 1164.16064453125, 877.8157348632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000480.npy"}
|
||||
{"epoch": 0.7048458149779736, "step": 481, "batch_size": 64, "mean": 452.87890625, "std": 568.56494140625, "min": -893.3746337890625, "p10": -192.19943695068358, "median": 374.5646057128906, "p90": 1019.4062988281252, "max": 2047.3262939453125, "pos_frac": 0.765625, "sample": [2047.3262939453125, -893.3746337890625, 587.8292846679688, 352.29681396484375, -221.9533233642578, 28.184158325195312, -108.63491821289062, 384.19140625, 460.2706298828125, 1542.473388671875, 132.410400390625, -197.22366333007812, 143.38038635253906, -322.093994140625, 465.05120849609375, 986.915771484375, -148.80545043945312, 774.4332885742188, 1485.4310302734375, -37.78672790527344, -36.65875244140625, 842.1437377929688, 296.4341125488281, 760.5442504882812, 430.6062927246094, 199.2540283203125, 185.38894653320312, 945.4893188476562, 786.0391845703125, -358.0657043457031, 153.82427978515625, 805.238525390625, 402.45074462890625, -177.22406005859375, 820.2333374023438, 801.8796997070312, -255.90399169921875, -59.29487991333008, 931.4415283203125, 515.7666625976562, -224.5909423828125, 135.9245147705078, 876.1507568359375, 903.1710815429688, 68.9994888305664, 113.94878387451172, 1684.9649658203125, 351.5162658691406, 1689.2808837890625, 1033.330810546875, 294.50030517578125, -8.781295776367188, 348.4557800292969, 464.5290222167969, 508.23681640625, 130.63818359375, 200.49172973632812, 872.5934448242188, 775.6637573242188, 785.9600830078125, 1674.9178466796875, -180.4762420654297, 364.93780517578125, 669.976806640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000481.npy"}
|
||||
{"epoch": 0.7063142437591777, "step": 482, "batch_size": 64, "mean": 214.26510620117188, "std": 631.9312133789062, "min": -2088.77392578125, "p10": -480.8723052978515, "median": 254.37477111816406, "p90": 811.1164367675782, "max": 1972.06201171875, "pos_frac": 0.6875, "sample": [248.11416625976562, -65.46495056152344, 329.3621520996094, -824.7781982421875, 423.39031982421875, 628.78173828125, -1102.219482421875, 609.4700317382812, 699.624267578125, 469.192626953125, 382.9808044433594, 652.0361938476562, -12.356292724609375, 311.739013671875, 659.4376831054688, 1243.7030029296875, 229.04815673828125, -356.74957275390625, 338.9092712402344, 314.647705078125, -29.852161407470703, 396.6894226074219, 460.85076904296875, 792.3154907226562, -943.4692993164062, 426.976806640625, 180.17742919921875, 990.2064208984375, 260.6353759765625, 515.8038330078125, 762.4148559570312, 597.3546142578125, 812.8213500976562, 233.66726684570312, -317.8692932128906, 171.86404418945312, 934.6727905273438, -503.2504577636719, 1399.5404052734375, -428.6566162109375, 609.6892700195312, 8.433145523071289, -204.96951293945312, 38.06043243408203, 146.26991271972656, 80.09002685546875, -2088.77392578125, 332.2476806640625, 1972.06201171875, -9.489112854003906, -3.1309814453125, -67.47883605957031, 23.19915771484375, 508.52532958984375, -1028.452880859375, 329.5414123535156, 163.326416015625, 1336.9879150390625, -214.60791015625, 88.46690368652344, -204.69808959960938, -138.80380249023438, 807.1383056640625, -662.4266357421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000482.npy"}
|
||||
{"epoch": 0.7077826725403817, "step": 483, "batch_size": 64, "mean": 478.5213623046875, "std": 600.3779907226562, "min": -547.7156372070312, "p10": -222.47664947509762, "median": 385.6799774169922, "p90": 1222.3652099609376, "max": 3016.7958984375, "pos_frac": 0.796875, "sample": [-158.62184143066406, 50.767852783203125, 1361.868896484375, 841.5892944335938, -283.9205322265625, 784.2369384765625, 395.3675537109375, 375.9924011230469, 664.5888061523438, 655.2699584960938, 409.08306884765625, 807.26318359375, 501.3653564453125, 117.54994201660156, 240.75729370117188, 262.11468505859375, 1038.4287109375, 1014.4921875, -547.7156372070312, 536.89990234375, 319.2554931640625, -39.44060516357422, 790.3623046875, 917.3433837890625, 45.42552185058594, -363.8243713378906, 1291.075927734375, 712.7857055664062, 51.48637390136719, 1083.772705078125, -234.6663055419922, 1362.00830078125, 16.893943786621094, 910.4647827148438, -8.153945922851562, 1186.3599853515625, 272.6152038574219, 1064.1630859375, 197.41998291015625, 1162.492431640625, 772.8613891601562, 635.1377563476562, 496.0830078125, -362.925537109375, 193.73379516601562, 342.6788330078125, 451.2315368652344, 288.46661376953125, 1237.7960205078125, -71.75636291503906, 219.9798583984375, -543.8336181640625, 170.81472778320312, 3016.7958984375, -147.28123474121094, 174.77220153808594, 1424.8040771484375, 273.14971923828125, 462.8450927734375, -289.5693054199219, 672.2891845703125, 1303.7984619140625, 292.31182861328125, -194.03411865234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000483.npy"}
|
||||
{"epoch": 0.7092511013215859, "step": 484, "batch_size": 64, "mean": 435.53656005859375, "std": 531.379150390625, "min": -681.354736328125, "p10": -111.22025604248044, "median": 359.74501037597656, "p90": 1066.8930419921876, "max": 1806.1072998046875, "pos_frac": 0.78125, "sample": [460.69744873046875, -129.3426971435547, 1782.05517578125, 119.83326721191406, -123.85334777832031, -228.77052307128906, 564.407958984375, 92.14403533935547, 1806.1072998046875, 206.30999755859375, 94.19174194335938, 476.453125, 579.4776000976562, 540.7613525390625, 987.2880859375, 688.5042114257812, 401.2516174316406, 40.148624420166016, -222.769775390625, -681.354736328125, 381.7144470214844, 11.836780548095703, 1073.2308349609375, 948.0015869140625, 1779.328125, 60.19560241699219, 363.138916015625, 799.7561645507812, 33.53030014038086, -72.84591674804688, -37.20229721069336, -14.20489501953125, 909.6102294921875, 1308.98828125, 45.71137237548828, 162.79592895507812, 240.4613037109375, 109.3874282836914, 148.69496154785156, -186.13597106933594, -26.375144958496094, 347.19207763671875, 810.0017700195312, 356.3511047363281, 1325.3968505859375, -81.7430419921875, 682.6235961914062, 1048.9559326171875, 441.3725891113281, -211.41061401367188, -51.52140808105469, 785.2147827148438, 1052.1048583984375, 861.4716186523438, 82.60124206542969, 1096.152587890625, 1045.6346435546875, 403.9217529296875, 507.84759521484375, 98.9875259399414, 828.2344970703125, 72.92018127441406, -65.8089370727539, 944.6788940429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000484.npy"}
|
||||
{"epoch": 0.71071953010279, "step": 485, "batch_size": 64, "mean": 411.17791748046875, "std": 596.7046508789062, "min": -947.8231811523438, "p10": -149.37587966918943, "median": 292.6755676269531, "p90": 1121.03232421875, "max": 2232.052490234375, "pos_frac": 0.75, "sample": [706.9376831054688, 701.2390747070312, -35.77763366699219, 260.05609130859375, 422.66583251953125, 171.40542602539062, 1140.425537109375, 191.43057250976562, 502.98980712890625, 470.3866882324219, 204.57318115234375, -35.58301544189453, 1568.998291015625, 1106.6927490234375, 138.46282958984375, 1016.1126098632812, -521.4024047851562, -93.8622817993164, 992.167724609375, 1094.4151611328125, 1037.98291015625, 1251.0338134765625, 140.15328979492188, 557.5857543945312, 2232.052490234375, 1180.272216796875, -34.53575897216797, 39.71051788330078, 834.7752075195312, -111.59029388427734, -65.2179946899414, -80.95523071289062, -632.203857421875, 164.52145385742188, 257.51373291015625, 911.98193359375, 481.0751953125, 120.64197540283203, 115.5548095703125, 707.0892333984375, -81.36627197265625, 138.55722045898438, 2062.002197265625, 479.8978271484375, -165.5697021484375, 377.1483459472656, 392.742919921875, 647.7224731445312, 325.2950439453125, -668.1494140625, 1127.1778564453125, 1020.6719970703125, 62.73813247680664, -300.2514343261719, 42.23042297363281, -194.95396423339844, 930.4451904296875, 159.76133728027344, 336.3362121582031, 190.06646728515625, -7.698173522949219, -947.8231811523438, 886.8806762695312, 391.7491760253906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000485.npy"}
|
||||
{"epoch": 0.7121879588839941, "step": 486, "batch_size": 64, "mean": 466.4476013183594, "std": 714.4720458984375, "min": -1220.11328125, "p10": -425.7793212890624, "median": 367.0062713623047, "p90": 1365.6197265625, "max": 2914.916259765625, "pos_frac": 0.796875, "sample": [1124.4315185546875, 596.1891479492188, 73.80195617675781, 637.2374877929688, 301.65728759765625, -954.9949951171875, 247.86880493164062, 123.61185455322266, 994.3882446289062, -184.44564819335938, 618.2901000976562, 1419.2047119140625, 661.1533813476562, -925.2283325195312, -483.82952880859375, 1539.7628173828125, 973.9014892578125, 938.4747314453125, -297.4568786621094, 1198.58056640625, 1367.150634765625, -362.19091796875, 1567.5802001953125, 1440.59228515625, 214.52491760253906, -226.32736206054688, 569.0574340820312, 368.1381530761719, 93.65182495117188, 1488.4012451171875, 1290.8203125, 365.4407958984375, -453.031494140625, -850.5272216796875, 339.65570068359375, 365.8743896484375, 96.42181396484375, 1298.5396728515625, 886.3512573242188, 801.3321533203125, 308.8543701171875, 823.1832275390625, 467.554443359375, -84.7601318359375, 601.2564086914062, 924.57861328125, 870.3741455078125, 592.4685668945312, 893.6683959960938, 1362.047607421875, 123.0630111694336, 340.42608642578125, 56.661529541015625, 364.3889465332031, 663.2078857421875, 155.94044494628906, 6.806432723999023, 545.3858032226562, -1220.11328125, 145.30250549316406, 317.6066589355469, -54.104339599609375, -530.1222534179688, 2914.916259765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000486.npy"}
|
||||
{"epoch": 0.7136563876651982, "step": 487, "batch_size": 64, "mean": 346.4443664550781, "std": 538.9556884765625, "min": -1708.74951171875, "p10": -186.4023223876953, "median": 317.78981018066406, "p90": 1089.293298339844, "max": 1737.89599609375, "pos_frac": 0.765625, "sample": [396.513427734375, 1211.396240234375, 201.6607666015625, -265.6168518066406, 1176.849853515625, 1110.134033203125, 724.0637817382812, 483.4542541503906, 1443.9385986328125, -39.67980194091797, 203.65252685546875, -436.3489990234375, 1111.1123046875, -189.75534057617188, 105.34850311279297, 1737.89599609375, 373.6568603515625, -275.8085021972656, 765.2570190429688, 803.7380981445312, 440.7548828125, 630.7576904296875, -1708.74951171875, 176.53085327148438, -110.26311492919922, 583.9318237304688, -12.860511779785156, 962.000244140625, 1318.84912109375, 43.439659118652344, 174.4509735107422, 330.26312255859375, 326.563232421875, 468.1146240234375, 161.50411987304688, -613.7740478515625, 252.90011596679688, -545.4049072265625, 587.8202514648438, 265.9549865722656, 407.30316162109375, 90.22161865234375, 635.79296875, -163.3096466064453, 473.09930419921875, 161.15512084960938, 77.53520202636719, 309.0163879394531, 406.32659912109375, 288.6125793457031, 140.78587341308594, 608.3046264648438, 287.33642578125, 471.6849365234375, -6.5958099365234375, -178.57861328125, 182.74520874023438, 1040.6649169921875, 794.4107666015625, 713.3336181640625, -16.516265869140625, 530.74267578125, -92.76028442382812, 636.88232421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000487.npy"}
|
||||
{"epoch": 0.7151248164464024, "step": 488, "batch_size": 64, "mean": 367.892578125, "std": 462.5895690917969, "min": -529.285400390625, "p10": -123.26457748413085, "median": 335.75201416015625, "p90": 1055.6404113769534, "max": 1613.155517578125, "pos_frac": 0.734375, "sample": [-4.215888977050781, -7.961391448974609, 1401.88720703125, 279.10174560546875, 1005.1482543945312, 174.58921813964844, 748.92138671875, -40.425148010253906, -125.73941040039062, 327.2068786621094, 473.2830810546875, 247.24900817871094, 561.3157348632812, 1230.945068359375, 350.25146484375, 767.7535400390625, -47.522430419921875, 34.539649963378906, -529.285400390625, -57.7454833984375, 499.0083923339844, -148.66848754882812, 483.41265869140625, 358.5811462402344, 1.5847015380859375, -284.8433837890625, 57.553035736083984, 655.6409301757812, 1613.155517578125, 264.09710693359375, 501.77740478515625, 643.6611328125, -351.6435241699219, 1077.2799072265625, 730.4081420898438, 49.83134078979492, 1256.6083984375, 712.9495849609375, 356.9349670410156, 133.39886474609375, 638.2955932617188, 966.8077392578125, 299.29852294921875, 313.57379150390625, 192.44117736816406, 344.2971496582031, 646.0597534179688, 1180.1943359375, -117.4899673461914, -81.3470230102539, -132.6007537841797, 489.6820983886719, 465.2779235839844, 51.527870178222656, 496.361083984375, 502.1286315917969, -74.24177551269531, 461.8394470214844, -0.3030281066894531, -76.21027374267578, -495.395263671875, 547.602294921875, 259.9710693359375, 1267.3275146484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000488.npy"}
|
||||
{"epoch": 0.7165932452276065, "step": 489, "batch_size": 64, "mean": 357.419677734375, "std": 526.283935546875, "min": -562.9900512695312, "p10": -162.31460647583003, "median": 201.0049819946289, "p90": 1160.4672485351564, "max": 2056.43896484375, "pos_frac": 0.734375, "sample": [804.899658203125, -13.456283569335938, 226.60618591308594, 112.00892639160156, 10.246267318725586, 145.62164306640625, 190.53065490722656, -260.1254577636719, 101.96693420410156, 205.76937866210938, -250.19493103027344, 166.67379760742188, 576.9466552734375, 780.17724609375, 463.7330627441406, 192.3219451904297, -180.35047912597656, 219.6510009765625, -77.75110626220703, 960.6896362304688, 10.308305740356445, -71.74740600585938, 243.0179901123047, 247.15606689453125, -82.80868530273438, 149.8320770263672, 191.74815368652344, -68.84857177734375, 196.24058532714844, 600.89453125, -45.55377197265625, 47.15992736816406, -29.918214797973633, -265.35137939453125, 1292.5201416015625, 714.7982788085938, 58.46580123901367, 108.11454772949219, 341.75439453125, -562.9900512695312, 650.2610473632812, -210.34808349609375, 1278.1600341796875, -390.138427734375, 1174.7850341796875, 1127.05908203125, 662.0802001953125, 2056.43896484375, 331.30816650390625, 564.3614501953125, 1831.5556640625, 528.4525146484375, 523.1058349609375, 1500.398193359375, -105.21175384521484, -120.23090362548828, 745.048828125, 496.4465637207031, 524.26123046875, 411.6082763671875, 0.601776123046875, 1294.684814453125, 558.1995849609375, -8.784912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000489.npy"}
|
||||
{"epoch": 0.7180616740088106, "step": 490, "batch_size": 64, "mean": 367.3455810546875, "std": 655.9293212890625, "min": -1208.7618408203125, "p10": -399.1303070068359, "median": 218.6532745361328, "p90": 1210.8778198242187, "max": 1912.5665283203125, "pos_frac": 0.765625, "sample": [75.00357055664062, -1110.9432373046875, 1197.36376953125, -283.9510803222656, 150.10910034179688, -53.864158630371094, 168.93505859375, 904.4970092773438, 855.2095336914062, -545.9915161132812, 156.0389404296875, -62.47265625, 1408.3170166015625, 52.751739501953125, 621.1883544921875, -419.8836364746094, -205.95925903320312, 1081.2586669921875, 656.8406372070312, -682.91748046875, 29.980510711669922, 141.95172119140625, -607.2373657226562, 61.34730529785156, -1208.7618408203125, 1305.0107421875, 87.87686157226562, 439.03387451171875, 536.8939208984375, 780.125, 212.00311279296875, 1912.5665283203125, 86.03265380859375, 625.4707641601562, 32.751976013183594, -208.900390625, 1685.7369384765625, 884.4345092773438, 1714.28564453125, 21.569385528564453, 288.40911865234375, 225.30343627929688, 1216.6695556640625, 671.0750732421875, -137.58558654785156, 789.4238891601562, -350.70587158203125, 833.8013305664062, 42.940765380859375, 508.71697998046875, 335.82269287109375, -531.4097900390625, 1746.8916015625, -22.0079345703125, 969.723876953125, 947.771484375, 80.08331298828125, 126.93670654296875, 622.1439208984375, 960.9614868164062, 57.52326583862305, 849.534423828125, 512.0529174804688, 272.3360595703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000490.npy"}
|
||||
{"epoch": 0.7195301027900147, "step": 491, "batch_size": 64, "mean": 254.20652770996094, "std": 475.7027282714844, "min": -820.6328735351562, "p10": -283.1089874267578, "median": 172.2318344116211, "p90": 801.0976928710938, "max": 1617.4498291015625, "pos_frac": 0.734375, "sample": [339.15667724609375, -276.1139221191406, 790.1064453125, 507.1961669921875, 1450.482177734375, 228.8348846435547, 340.0973205566406, -140.6645050048828, 435.4339294433594, 94.198974609375, 323.6880798339844, 113.79951477050781, 59.118568420410156, -534.768798828125, 504.2454528808594, 116.74250793457031, 704.1650390625, 458.4407043457031, -13.555343627929688, 520.0321044921875, 160.37979125976562, 63.95502471923828, 961.0047607421875, -476.8663024902344, 231.3570098876953, 238.90704345703125, 691.2496948242188, -243.99183654785156, 805.8082275390625, 39.178977966308594, 1055.8082275390625, 416.9743957519531, -820.6328735351562, 129.984375, -206.71546936035156, 267.1622009277344, 4.206840515136719, 1096.5960693359375, 268.99432373046875, 360.75665283203125, 662.8276977539062, -578.16943359375, 1617.4498291015625, 39.831451416015625, -182.0942840576172, 153.83895874023438, -286.10687255859375, 184.08387756347656, -50.595252990722656, 549.2985229492188, 96.51478576660156, -372.03778076171875, 706.6487426757812, -90.00068664550781, -55.036048889160156, 137.8804168701172, 360.6930847167969, 769.767333984375, 117.64500427246094, -47.138275146484375, 1300.677001953125, 135.5687255859375, 480.8616943359375, -447.9432067871094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000491.npy"}
|
||||
{"epoch": 0.7209985315712188, "step": 492, "batch_size": 64, "mean": 247.95816040039062, "std": 552.0293579101562, "min": -958.4644165039062, "p10": -396.3013031005859, "median": 159.65695190429688, "p90": 1037.4760986328126, "max": 1383.373046875, "pos_frac": 0.734375, "sample": [163.12091064453125, 266.45086669921875, 485.6094055175781, 38.093955993652344, 245.84872436523438, 35.79402160644531, 17.645782470703125, 703.6533813476562, -517.67333984375, -168.7209930419922, 158.42044067382812, 611.5607299804688, 671.147705078125, -251.38095092773438, -958.4644165039062, 997.9013671875, -110.41898345947266, 1196.58544921875, -167.39083862304688, 1359.3433837890625, -371.8240966796875, 1379.0057373046875, 292.7289733886719, -308.6147766113281, 396.55645751953125, 379.65228271484375, 262.7877502441406, -492.40283203125, 66.39799499511719, 335.055908203125, 1.49017333984375, 25.47768211364746, 160.89346313476562, -837.3897094726562, -163.40576171875, 1196.9931640625, 871.897216796875, 299.90545654296875, -709.2680053710938, 92.74040985107422, 1023.18505859375, -706.467041015625, 1189.701416015625, 17.223487854003906, -147.2744598388672, 808.19873046875, 205.33544921875, 84.63856506347656, 971.2584838867188, 69.63833618164062, 502.6297302246094, -133.8728485107422, 153.48187255859375, -406.7915344238281, 1043.600830078125, 873.2901611328125, 501.49493408203125, 40.88581848144531, 76.88078308105469, 485.7531433105469, -280.29052734375, 1383.373046875, 47.73194885253906, 409.9139404296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000492.npy"}
|
||||
{"epoch": 0.7224669603524229, "step": 493, "batch_size": 64, "mean": 277.2387390136719, "std": 529.29248046875, "min": -1861.5814208984375, "p10": -196.6875961303711, "median": 195.32789611816406, "p90": 1028.033447265625, "max": 1350.5870361328125, "pos_frac": 0.765625, "sample": [997.9462890625, 68.2094955444336, 539.548583984375, 246.79965209960938, -90.70751953125, 1214.0355224609375, 909.5325927734375, 18.794097900390625, 94.5234603881836, -361.19708251953125, 462.1800537109375, 93.16169738769531, 1217.985107421875, 473.82061767578125, 363.38165283203125, 136.13943481445312, -297.1416931152344, 289.9044494628906, 133.40072631835938, 44.21153259277344, 131.78736877441406, 293.8910217285156, 432.37005615234375, 400.18896484375, -231.36187744140625, 458.6378173828125, 651.6512451171875, -188.6810302734375, -178.77313232421875, -130.5870361328125, 24.83905029296875, -1861.5814208984375, 341.8564758300781, 30.048538208007812, 789.3683471679688, 22.204309463500977, 1350.5870361328125, 1251.0914306640625, 172.19073486328125, -197.59739685058594, -124.46047973632812, 372.5645751953125, 1191.5474853515625, 865.1568603515625, 345.6659240722656, 1024.3326416015625, -480.0765075683594, 325.54461669921875, 1348.131103515625, 1029.6195068359375, 83.69341278076172, 597.179443359375, -281.0248718261719, 209.1403350830078, 206.88568115234375, -194.56472778320312, -97.81950378417969, 256.2532653808594, 689.1719970703125, 183.77011108398438, 147.87557983398438, 2.09033203125, -80.34097290039062, 6.2856597900390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000493.npy"}
|
||||
{"epoch": 0.723935389133627, "step": 494, "batch_size": 64, "mean": 436.12835693359375, "std": 483.8360900878906, "min": -595.6873779296875, "p10": -88.1373229980468, "median": 410.8778533935547, "p90": 1106.0121215820313, "max": 1756.781005859375, "pos_frac": 0.859375, "sample": [-2.630247116088867, 337.560302734375, 511.018798828125, 392.1076965332031, 1756.781005859375, 1407.0714111328125, -595.6873779296875, 732.3110961914062, 707.9981079101562, 111.93365478515625, 434.7063903808594, -12.116596221923828, 848.2973022460938, 364.24053955078125, -289.7044677734375, 1109.9305419921875, 663.6904296875, -230.66070556640625, 417.0910339355469, 793.3244018554688, 121.2468032836914, -437.8417053222656, 258.8114318847656, 904.5305786132812, 835.9349365234375, 41.89148712158203, 746.7677001953125, 1096.869140625, 250.80787658691406, 373.0455017089844, -442.916015625, 49.28790283203125, 241.3224639892578, -292.443115234375, 1681.3140869140625, 411.9189147949219, 442.43707275390625, 69.97127532958984, 409.8367919921875, 1248.7904052734375, 1139.26123046875, 502.58111572265625, 8.861618041992188, 50.65215301513672, 478.56317138671875, 624.8341064453125, 745.432373046875, 921.9614868164062, 386.63006591796875, 612.4197387695312, -120.71763610839844, 74.51594543457031, 528.7503662109375, 130.97457885742188, 154.42581176757812, 434.86688232421875, 826.7272338867188, 653.8948364257812, 230.4892578125, 428.609619140625, 1170.8968505859375, 56.36753463745117, 349.2045593261719, 53.164306640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000494.npy"}
|
||||
{"epoch": 0.7254038179148311, "step": 495, "batch_size": 64, "mean": 380.0621337890625, "std": 567.6135864257812, "min": -970.7493286132812, "p10": -162.38106689453124, "median": 310.9029083251953, "p90": 1053.513073730469, "max": 2380.029052734375, "pos_frac": 0.78125, "sample": [359.5145263671875, 109.84199523925781, -20.2198486328125, -166.43304443359375, 229.1536865234375, 310.87640380859375, -534.1321411132812, 157.8682861328125, 300.5074462890625, 32.95491027832031, 175.8250274658203, 731.31396484375, 916.8261108398438, -619.6673583984375, 370.10693359375, 313.6772766113281, -279.6150207519531, 816.511474609375, 82.44210815429688, 665.123779296875, 156.1732177734375, 1.404449462890625, 295.3491516113281, 81.36578369140625, 369.4809265136719, 1011.1436767578125, 925.0404663085938, 738.6167602539062, 208.47842407226562, 541.9190673828125, 284.12762451171875, 325.809326171875, 141.55706787109375, 428.45428466796875, 1075.6497802734375, 1071.67138671875, 836.2734985351562, -120.46138000488281, 2380.029052734375, 1371.609619140625, 365.3862609863281, -22.765670776367188, 131.68228149414062, 635.5548095703125, 351.5146789550781, -565.0882568359375, -29.684341430664062, 343.2960205078125, 62.216514587402344, -285.1101989746094, -152.92645263671875, -8.13094711303711, 414.37445068359375, 504.92236328125, 1361.480224609375, 571.8526000976562, 310.9294128417969, 852.1087036132812, -970.7493286132812, 1133.85009765625, 985.1876831054688, 300.09185791015625, 2001.78369140625, -43.967552185058594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000495.npy"}
|
||||
{"epoch": 0.7268722466960352, "step": 496, "batch_size": 64, "mean": 205.62347412109375, "std": 443.10955810546875, "min": -901.3890991210938, "p10": -342.20185852050776, "median": 166.8455047607422, "p90": 871.730126953125, "max": 1122.986328125, "pos_frac": 0.71875, "sample": [99.39254760742188, -482.7886962890625, 978.6121826171875, 856.279541015625, -765.0285034179688, 711.57421875, 132.6781005859375, 288.02520751953125, 878.351806640625, 178.60818481445312, 1122.986328125, 278.3011779785156, 391.2510681152344, 245.54229736328125, -273.9625244140625, -25.571651458740234, -154.7025146484375, 400.083984375, -730.5203857421875, 1108.2230224609375, 918.0323486328125, 402.4209289550781, 785.96630859375, 413.75634765625, 883.2147827148438, 261.8057861328125, -150.85777282714844, -54.76216125488281, 72.416259765625, -360.2340393066406, 184.70526123046875, 73.62657165527344, 92.42733764648438, 428.2395935058594, 63.49449920654297, -88.24815368652344, 479.847900390625, 155.67916870117188, -548.4933471679688, 342.4595947265625, -464.3330078125, -300.12677001953125, 643.4937133789062, 64.8406753540039, 164.4722442626953, 551.0010986328125, 169.21876525878906, 436.0646667480469, 594.0591430664062, 448.6352233886719, 78.97146606445312, -162.6283416748047, 435.86151123046875, -25.691415786743164, 204.83389282226562, -901.3890991210938, 117.86128234863281, 961.5513305664062, -0.5159072875976562, 16.808082580566406, 446.8497619628906, -3.840923309326172, 79.83135223388672, 11.24139404296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000496.npy"}
|
||||
{"epoch": 0.7283406754772394, "step": 497, "batch_size": 64, "mean": 205.537109375, "std": 462.5643310546875, "min": -876.115234375, "p10": -298.54652404785156, "median": 163.23263549804688, "p90": 851.2255249023438, "max": 1338.739013671875, "pos_frac": 0.6875, "sample": [-301.6589050292969, 907.2845458984375, 621.6254272460938, 662.1111450195312, 10.301538467407227, -308.9118957519531, 489.657470703125, -229.73304748535156, -106.01853942871094, -199.8565216064453, 239.7641143798828, 1020.02001953125, 1146.02587890625, 402.75445556640625, 47.122955322265625, 180.83172607421875, 390.7514343261719, 193.6685333251953, 611.9741821289062, 439.5954284667969, -121.73619079589844, -76.82573699951172, 590.353759765625, 856.5540771484375, 9.382400512695312, 45.836212158203125, 838.792236328125, -116.52943420410156, 20.337020874023438, -614.2681274414062, 461.0447998046875, 64.57063293457031, -16.956218719482422, 353.76837158203125, 34.40024185180664, 1274.0819091796875, 48.14569091796875, 1338.739013671875, 68.19361877441406, 72.78533935546875, 186.05984497070312, 306.9655456542969, 313.6539306640625, 657.7244262695312, -876.115234375, 127.94515991210938, 554.9647827148438, -445.5069885253906, 329.4165954589844, 145.633544921875, 395.4674377441406, 276.0924072265625, 960.8513793945312, -160.91709899902344, -174.1536102294922, 219.15646362304688, -128.8105926513672, -291.2843017578125, -87.46034240722656, -784.73046875, 644.85986328125, -505.053955078125, 331.450439453125, -189.81333923339844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000497.npy"}
|
||||
{"epoch": 0.7298091042584435, "step": 498, "batch_size": 64, "mean": 339.7348327636719, "std": 556.2342529296875, "min": -1228.3848876953125, "p10": -217.4216384887695, "median": 348.4901428222656, "p90": 989.3061462402346, "max": 1873.61181640625, "pos_frac": 0.75, "sample": [-825.4866333007812, 172.78762817382812, 651.1806640625, 252.88552856445312, -185.82321166992188, 548.6484985351562, 665.9170532226562, 1014.335693359375, 384.3860778808594, 621.4564208984375, 405.5318908691406, 713.146240234375, 258.7655944824219, 885.1658935546875, -1228.3848876953125, 673.7623901367188, 930.9038696289062, 1414.68310546875, -127.59465789794922, 626.4456176757812, 119.52837371826172, 696.3704833984375, -150.60757446289062, 846.0397338867188, -165.94381713867188, 370.8954772949219, 326.0848083496094, 111.67316436767578, -307.77752685546875, 9.097694396972656, 296.0212707519531, 372.31671142578125, -995.483642578125, 1176.41552734375, 424.382568359375, 10.8934326171875, -150.91329956054688, 474.72467041015625, 552.6380004882812, 546.8463745117188, 1264.8795166015625, -35.91948699951172, 21.2386474609375, 90.61701965332031, 678.9072265625, 621.1131591796875, -397.9060363769531, 38.28955078125, 1327.064208984375, 911.4824829101562, 252.3922882080078, -69.52337646484375, 320.1248474121094, -182.65625, -230.9638214111328, 26.970428466796875, 526.5886840820312, 178.78887939453125, 887.6748657226562, 556.3489379882812, -281.9245300292969, 1074.6195068359375, 1873.61181640625, -124.70559692382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000498.npy"}
|
||||
{"epoch": 0.7312775330396476, "step": 499, "batch_size": 64, "mean": 396.4712829589844, "std": 523.7645874023438, "min": -951.9523315429688, "p10": -181.1992263793945, "median": 418.40965270996094, "p90": 1043.5861816406255, "max": 2079.344970703125, "pos_frac": 0.796875, "sample": [1097.7052001953125, 68.23481750488281, 201.76388549804688, -951.9523315429688, -69.2022933959961, 555.00439453125, 500.2206726074219, 616.8599853515625, -16.39466094970703, -117.38967895507812, 422.405517578125, 479.20819091796875, 76.71868133544922, 931.2161865234375, 559.3168334960938, 151.02920532226562, 651.2499389648438, -565.9146728515625, 2079.344970703125, 285.428466796875, 198.40652465820312, 713.822265625, 672.579345703125, 35.49266815185547, -482.52630615234375, 414.4137878417969, 280.5216369628906, 912.5416259765625, -280.183837890625, 1091.7447509765625, 210.37982177734375, 3.75347900390625, 590.1596069335938, -93.5569839477539, 306.6559753417969, 33.54346466064453, 556.6884765625, 926.420166015625, 828.0250244140625, -207.9963836669922, 29.909696578979492, 547.1384887695312, 604.25, 583.969970703125, 650.5153198242188, 122.18709564208984, 676.011474609375, 1591.4453125, 1193.3248291015625, 92.61994171142578, 481.3385314941406, 751.7689208984375, 401.56689453125, 780.757080078125, -40.997806549072266, 778.4755859375, 1167.4981689453125, 351.24603271484375, -556.90576171875, -160.9998321533203, -189.85610961914062, 641.842529296875, 10.678548812866211, 1200.638427734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000499.npy"}
|
||||
{"epoch": 0.7327459618208517, "step": 500, "batch_size": 64, "mean": 340.6584167480469, "std": 531.899169921875, "min": -1026.7510986328125, "p10": -378.69358520507797, "median": 269.9417419433594, "p90": 917.3555114746094, "max": 1739.6341552734375, "pos_frac": 0.796875, "sample": [274.3171081542969, -214.52101135253906, -598.0132446289062, 1739.6341552734375, -517.5682373046875, 515.5653686523438, 929.3179931640625, 475.0749206542969, 785.2230834960938, 129.2860107421875, 815.3759765625, 604.389892578125, 710.4026489257812, -448.2946472167969, 7.431943893432617, 475.8482360839844, 32.43965148925781, 96.59124755859375, 258.9822082519531, 1389.0286865234375, 616.9945678710938, -26.917587280273438, 689.6818237304688, 1243.515380859375, -16.219337463378906, 740.5479736328125, -474.40728759765625, 889.4430541992188, 806.6338500976562, -657.9625854492188, 767.2747192382812, 803.748779296875, 6.6800384521484375, 136.3608856201172, 89.45783233642578, 826.8297119140625, -682.2689208984375, 142.276611328125, 977.0280151367188, -1026.7510986328125, -173.2637481689453, 101.48661804199219, 144.7633056640625, 674.5728759765625, 32.594337463378906, -2.7333431243896484, 556.060546875, 79.58397674560547, 132.0030517578125, 265.5663757324219, 1182.2440185546875, 615.1176147460938, 685.5345458984375, 204.9163818359375, 628.8699340820312, 414.2856140136719, 662.2283935546875, 380.474365234375, 34.408897399902344, 606.8109741210938, 142.5956268310547, 1226.152587890625, -216.29110717773438, 111.69715881347656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000500.npy"}
|
||||
{"epoch": 0.7342143906020558, "step": 501, "batch_size": 64, "mean": 368.9128112792969, "std": 557.3740234375, "min": -817.5323486328125, "p10": -315.9778732299804, "median": 362.382568359375, "p90": 1127.8998779296876, "max": 1844.3668212890625, "pos_frac": 0.765625, "sample": [-115.38907623291016, 420.5760498046875, 264.7581787109375, -363.9725036621094, -38.680912017822266, 1256.8319091796875, 242.1348114013672, 912.1203002929688, 251.99510192871094, 4.642547607421875, 185.3515625, -175.02645874023438, 325.685791015625, 159.1385955810547, 1844.3668212890625, 751.2545776367188, 972.0869140625, 415.9063415527344, 448.9272766113281, 920.3206787109375, 1187.5189208984375, -67.58012390136719, 250.4485321044922, 141.64730834960938, 877.4838256835938, -444.2042541503906, -596.9141845703125, 499.80682373046875, 1079.1917724609375, 518.5880126953125, -687.1594848632812, 214.36465454101562, 466.2607421875, 378.8013610839844, 522.8592529296875, 1276.7374267578125, -230.33226013183594, 525.0696411132812, 367.64398193359375, 932.5265502929688, 1148.7747802734375, 16.316856384277344, -93.62614440917969, -352.6831359863281, 637.0260009765625, 37.60650634765625, 21.332416534423828, 548.5721435546875, 76.28665161132812, -817.5323486328125, 357.12115478515625, 573.8143920898438, -195.9941864013672, -90.26956176757812, 14.652774810791016, -465.665771484375, 1073.5909423828125, 758.4441528320312, 1225.99560546875, 436.60382080078125, 1805.07275390625, 529.1204833984375, 445.70751953125, 24.365619659423828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000501.npy"}
|
||||
{"epoch": 0.73568281938326, "step": 502, "batch_size": 64, "mean": 361.4590759277344, "std": 419.40155029296875, "min": -594.37158203125, "p10": -65.39839172363276, "median": 315.8251953125, "p90": 956.8492980957034, "max": 1847.9150390625, "pos_frac": 0.875, "sample": [636.5403442382812, -262.497314453125, 814.4312133789062, 485.5806579589844, 66.38018798828125, 602.93603515625, -86.67151641845703, 39.84423828125, 1217.7369384765625, 358.17431640625, 387.1773681640625, 289.7814636230469, 356.8509521484375, 1081.0489501953125, 908.0254516601562, 35.6041259765625, 4.7296142578125, 262.9692687988281, -594.37158203125, 104.0035400390625, 635.2517700195312, 992.51123046875, 110.98052215576172, 416.9625549316406, 35.2158203125, 172.9313201904297, 57.52754211425781, 317.35595703125, 444.40869140625, 371.3002014160156, -234.66038513183594, 1061.2122802734375, 85.42183685302734, 487.5244445800781, 463.1815185546875, 797.0645141601562, -119.79948425292969, 1847.9150390625, 27.181198120117188, 12.926897048950195, 729.8465576171875, 50.42991638183594, 249.61297607421875, 306.32373046875, 394.4516906738281, 584.9949340820312, 1171.0584716796875, 43.68415069580078, 661.1407470703125, 428.06072998046875, 758.8038330078125, 14.771224975585938, -15.761102676391602, 113.99099731445312, 977.7738037109375, -147.9405059814453, 289.66448974609375, 43.575347900390625, 726.66650390625, 322.59844970703125, -89.92252349853516, 329.07415771484375, 185.50086975097656, 314.29443359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000502.npy"}
|
||||
{"epoch": 0.737151248164464, "step": 503, "batch_size": 64, "mean": 332.10968017578125, "std": 437.67864990234375, "min": -333.416015625, "p10": -100.05692138671876, "median": 238.76656341552734, "p90": 774.6872253417969, "max": 1973.5906982421875, "pos_frac": 0.78125, "sample": [2.599895477294922, -63.693626403808594, 1010.5792846679688, 10.250656127929688, -190.49330139160156, -196.423828125, 459.4969482421875, 241.73890686035156, 115.37667083740234, -46.219993591308594, -333.416015625, 340.18780517578125, -100.6479263305664, 175.1581268310547, 987.4873657226562, 768.584228515625, 346.36993408203125, 718.48681640625, 81.5074462890625, 682.178955078125, -67.88182067871094, 102.47041320800781, 10.419967651367188, 85.07062530517578, 406.20660400390625, -85.15324401855469, -98.67790985107422, 561.3662109375, 235.79421997070312, 170.1732177734375, 469.48699951171875, 2.33685302734375, 110.86741638183594, 605.8089599609375, 20.735366821289062, -300.8048095703125, 1973.5906982421875, 1654.2520751953125, 571.2003173828125, 398.10394287109375, 117.94406127929688, 678.6526489257812, 725.1329956054688, 94.65624237060547, -74.771484375, 593.1986694335938, 167.3697967529297, 766.2085571289062, 204.13168334960938, 377.197998046875, 158.5247802734375, 517.28125, 438.77557373046875, 259.70721435546875, 962.9660034179688, 508.94207763671875, -125.7776870727539, 737.7381591796875, -193.13302612304688, 1107.1788330078125, 261.5309753417969, 420.15313720703125, 777.3027954101562, -60.367279052734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000503.npy"}
|
||||
{"epoch": 0.7386196769456681, "step": 504, "batch_size": 64, "mean": 398.95245361328125, "std": 553.2864379882812, "min": -658.9725341796875, "p10": -324.59171447753897, "median": 326.43994140625, "p90": 1039.4508422851563, "max": 1695.787841796875, "pos_frac": 0.765625, "sample": [-133.38967895507812, 156.8521728515625, 917.0464477539062, 390.04351806640625, 116.84339904785156, 282.3158264160156, 666.7881469726562, -230.66168212890625, 1664.768310546875, 1330.5355224609375, 1695.787841796875, -618.0576782226562, 571.5191650390625, 685.0697021484375, -50.74192810058594, -85.50343322753906, 622.7373046875, 208.86135864257812, -14.629585266113281, 18.302215576171875, 56.35533905029297, 15.580955505371094, 295.66168212890625, -214.50108337402344, 238.7620086669922, 211.96559143066406, -515.023193359375, 493.685546875, 170.60867309570312, 436.94769287109375, -364.8474426269531, 1042.6826171875, -470.3276062011719, 1349.9207763671875, 872.9720458984375, 705.760986328125, 807.720458984375, 581.8873291015625, 250.86404418945312, 319.1790771484375, 179.29388427734375, 1273.617431640625, 729.8699951171875, 484.0852355957031, 954.2670288085938, -173.76278686523438, -473.3396301269531, 841.2090454101562, 38.86567687988281, 960.4532470703125, 193.97103881835938, 682.817138671875, 1031.9100341796875, 712.0785522460938, 347.309326171875, 328.871337890625, -455.60894775390625, 908.9237670898438, 1615.8896484375, 324.008544921875, -658.9725341796875, 508.44476318359375, 708.5075073242188, -10.093994140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000504.npy"}
|
||||
{"epoch": 0.7400881057268722, "step": 505, "batch_size": 64, "mean": 219.14529418945312, "std": 440.14556884765625, "min": -745.8865356445312, "p10": -233.44024353027342, "median": 173.38536071777344, "p90": 865.4058654785156, "max": 1418.120361328125, "pos_frac": 0.6875, "sample": [6.995811462402344, 537.7227172851562, -237.85223388671875, 565.3704833984375, -49.71705627441406, 902.3966674804688, 74.70643615722656, -40.98381042480469, 99.51141357421875, 31.342300415039062, 790.71923828125, 825.31396484375, 1418.120361328125, -15.0035400390625, 567.5537719726562, 549.6226806640625, -186.46839904785156, 351.24371337890625, 162.5849609375, -202.1100616455078, 285.8525085449219, 579.4262084960938, 202.111083984375, 85.09579467773438, -485.20745849609375, -638.2508544921875, 130.46263122558594, 255.72802734375, 909.6454467773438, 267.09954833984375, -48.19789505004883, 380.35760498046875, 429.1827392578125, -223.14559936523438, 214.68649291992188, 723.6898193359375, 30.522499084472656, 672.3191528320312, -81.83016204833984, 517.1406860351562, -670.6408081054688, -745.8865356445312, 185.9272918701172, 207.1968536376953, 12.38873291015625, 102.01226043701172, 938.2753295898438, 439.7900695800781, -198.79234313964844, 862.86328125, 171.2581024169922, -187.53558349609375, -368.99456787109375, 882.9779052734375, 175.5126190185547, -88.79045104980469, 866.4955444335938, -427.529296875, -140.0888671875, 49.14411926269531, 879.5833129882812, 231.5489501953125, 530.6763916015625, -39.849971771240234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000505.npy"}
|
||||
{"epoch": 0.7415565345080763, "step": 506, "batch_size": 64, "mean": 361.67413330078125, "std": 496.19549560546875, "min": -773.3080444335938, "p10": -139.54901580810548, "median": 342.88665771484375, "p90": 1039.2372314453125, "max": 1939.2320556640625, "pos_frac": 0.734375, "sample": [-423.40679931640625, 36.58514404296875, 708.7373046875, -7.5093536376953125, 251.03504943847656, 336.2564697265625, -203.1980438232422, 598.9994506835938, 740.8015747070312, 36.798519134521484, 615.8346557617188, 266.235595703125, 300.70404052734375, 154.73072814941406, -703.4614868164062, 1040.594482421875, 464.5948486328125, -31.186264038085938, 302.6951904296875, 633.5006713867188, 450.3265380859375, 1351.60302734375, 375.1163635253906, 63.68981170654297, 1939.2320556640625, -138.6791534423828, -139.92181396484375, 463.7685546875, 654.588134765625, 287.154296875, 378.31695556640625, 1157.0477294921875, 423.13525390625, 105.80563354492188, 532.6569213867188, -624.6109008789062, 359.293701171875, 184.05938720703125, 1061.1973876953125, 860.54296875, 550.9302368164062, 647.060546875, -123.44313049316406, 438.00347900390625, 151.42930603027344, -773.3080444335938, 305.5876770019531, -25.978248596191406, 1036.0703125, 901.244140625, 1034.2508544921875, -4.385341644287109, 349.516845703125, 743.3534545898438, 722.1194458007812, 1081.8236083984375, -4.3278656005859375, 1048.6160888671875, 189.58335876464844, -25.82941436767578, -302.8936767578125, -13.343963623046875, 437.7492980957031, -80.34906768798828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000506.npy"}
|
||||
{"epoch": 0.7430249632892805, "step": 507, "batch_size": 64, "mean": 314.0374755859375, "std": 605.41552734375, "min": -1088.2271728515625, "p10": -359.6368194580078, "median": 291.5054931640625, "p90": 844.6380249023439, "max": 2411.1865234375, "pos_frac": 0.71875, "sample": [3.734029769897461, 481.96649169921875, -49.94404602050781, -1088.2271728515625, 1194.0111083984375, -137.98199462890625, 60.76231384277344, 384.3133239746094, 39.84876251220703, 133.46136474609375, -76.48249816894531, 425.64483642578125, 12.155410766601562, 414.880859375, 656.222412109375, 721.031982421875, -458.7060546875, 323.7967529296875, 176.3345489501953, 2043.0081787109375, 492.90948486328125, 621.0928955078125, 35.07944869995117, -236.57174682617188, 450.66729736328125, 313.90008544921875, 652.04296875, 858.7783813476562, -378.3077087402344, 336.74896240234375, 348.517578125, -65.67257690429688, -316.0714111328125, -39.28392028808594, 32.054908752441406, 661.68701171875, 596.2840576171875, 1407.7882080078125, 655.0011596679688, 62.65984344482422, 271.2626953125, 19.925918579101562, -40.857635498046875, 377.7416076660156, 2411.1865234375, 939.4231567382812, 184.22454833984375, 298.26568603515625, -437.3600769042969, 729.2079467773438, 788.0701904296875, -550.5297241210938, -26.25646209716797, 674.8837890625, 1921.628662109375, 628.7052001953125, 811.6438598632812, 450.5632629394531, 284.74530029296875, -25.8160343170166, 30.934974670410156, -140.92697143554688, -418.9162902832031, -832.4874267578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000507.npy"}
|
||||
{"epoch": 0.7444933920704846, "step": 508, "batch_size": 64, "mean": 266.6474609375, "std": 621.9771728515625, "min": -1508.0501708984375, "p10": -399.3205535888672, "median": 238.8148193359375, "p90": 943.4833129882816, "max": 2259.099609375, "pos_frac": 0.6875, "sample": [202.15109252929688, 169.4642333984375, -681.134765625, 736.7457885742188, 559.4208374023438, -54.82757568359375, 117.93014526367188, 1941.730712890625, 434.6089172363281, 651.8135375976562, 424.6153259277344, 1027.2276611328125, 275.4785461425781, 2259.099609375, 574.0980834960938, 301.36767578125, 197.05300903320312, -675.2545166015625, 1256.4197998046875, 873.283447265625, -523.9200439453125, -254.51011657714844, 330.25213623046875, -1080.0107421875, 973.5689697265625, -239.85726928710938, 88.8046875, -202.84133911132812, -110.34074401855469, 325.30450439453125, -48.47163391113281, 362.9059753417969, 48.33515930175781, -30.143047332763672, 1630.6214599609375, -1508.0501708984375, 92.5527572631836, 135.7863311767578, 176.20556640625, 763.6673583984375, 404.6020202636719, 126.07965850830078, 110.36312866210938, 457.290283203125, 566.0270385742188, 277.29425048828125, 516.6465454101562, 609.98046875, 743.8178100585938, 513.6071166992188, 658.2494506835938, -255.61447143554688, -257.94244384765625, 1000.2053833007812, 577.76708984375, -234.240478515625, -114.3516845703125, -46.18701171875, -434.32806396484375, 650.1898193359375, -404.4462890625, 446.87054443359375, 19.767614364624023, -387.3605041503906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000508.npy"}
|
||||
{"epoch": 0.7459618208516887, "step": 509, "batch_size": 64, "mean": 292.65716552734375, "std": 459.6500244140625, "min": -647.9981079101562, "p10": -262.52355499267577, "median": 245.3778533935547, "p90": 938.4312622070313, "max": 1716.238037109375, "pos_frac": 0.78125, "sample": [-48.71448516845703, -77.01138305664062, -566.5128173828125, 897.300537109375, -131.53643798828125, 524.8711547851562, 69.78009796142578, 111.11048126220703, 504.5955810546875, 343.49090576171875, 660.6602172851562, 122.26605987548828, 290.45611572265625, 94.73776245117188, 194.58680725097656, 152.7113800048828, 934.6029663085938, -481.36602783203125, -428.1878662109375, 394.3762512207031, 1152.078125, 475.7182922363281, 151.16676330566406, 249.46966552734375, 420.47906494140625, 727.1764526367188, 156.53240966796875, 121.02645111083984, 301.3982849121094, 421.7455139160156, 560.0807495117188, -312.4195251464844, 91.21818542480469, -199.5973663330078, 293.2058410644531, 940.0719604492188, 761.6361694335938, 60.92927551269531, 209.06362915039062, -229.5585174560547, -44.340576171875, 444.98992919921875, 306.3380126953125, 124.28659057617188, 47.55464172363281, -647.9981079101562, 1081.7998046875, 1276.9649658203125, -92.66957092285156, 1716.238037109375, -276.65142822265625, 178.11981201171875, -363.4795837402344, 34.326507568359375, 267.3929138183594, 354.91455078125, 96.08708953857422, 268.6726379394531, 241.28604125976562, 495.14837646484375, 694.0463256835938, 1172.415771484375, 428.8956298828125, 1012.0813598632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000509.npy"}
|
||||
{"epoch": 0.7474302496328928, "step": 510, "batch_size": 64, "mean": 386.8134765625, "std": 424.6536560058594, "min": -544.45947265625, "p10": -156.32365722656246, "median": 391.5742950439453, "p90": 861.7651977539062, "max": 1379.748291015625, "pos_frac": 0.78125, "sample": [258.3844909667969, 875.6951904296875, 1208.8424072265625, 441.2361145019531, -172.29629516601562, 867.431396484375, 751.7836303710938, 346.46075439453125, 85.53605651855469, 758.8546142578125, 401.6313171386719, 563.7468872070312, -262.5110778808594, 538.29150390625, 1111.7940673828125, 828.6639404296875, 678.181396484375, -544.45947265625, -56.76781463623047, 840.028076171875, 136.09347534179688, 381.51727294921875, 425.02099609375, 177.99948120117188, 280.0002746582031, 682.79931640625, 589.5237426757812, 848.5440673828125, 1098.255859375, -264.14361572265625, 819.2093505859375, 1379.748291015625, -119.05416870117188, 138.671142578125, 674.0662841796875, 145.95352172851562, 310.7001647949219, 1247.68603515625, -302.06976318359375, 327.72637939453125, 263.32891845703125, 112.2160415649414, 685.9512939453125, 276.839599609375, 487.03857421875, -52.808753967285156, 516.6451416015625, 412.57415771484375, 37.48118591308594, 405.56683349609375, 723.5466918945312, -49.98949432373047, 126.525390625, 721.9248657226562, -69.14649963378906, 519.3426513671875, -338.8794250488281, 829.52490234375, -58.41216278076172, -187.35723876953125, -22.88075828552246, 680.2276611328125, 182.56478881835938, 55.461971282958984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000510.npy"}
|
||||
{"epoch": 0.748898678414097, "step": 511, "batch_size": 64, "mean": 305.0107421875, "std": 559.1774291992188, "min": -1448.028564453125, "p10": -186.04444274902332, "median": 203.71514892578125, "p90": 967.8306579589847, "max": 1716.729248046875, "pos_frac": 0.765625, "sample": [-60.30443572998047, 1261.4547119140625, 198.45962524414062, 178.2852325439453, 376.21002197265625, 94.09629821777344, 1002.9166870117188, 1469.3961181640625, 514.4116821289062, 164.50924682617188, 36.62012481689453, 640.2913208007812, 654.536376953125, 664.774658203125, 93.26271057128906, 1127.029052734375, -1448.028564453125, 439.5709228515625, -368.4600830078125, 663.3948364257812, 1406.587158203125, 772.8029174804688, 74.16651916503906, 436.50634765625, 236.90277099609375, 19.81928253173828, -709.9124755859375, 373.566650390625, -45.605682373046875, 627.9255981445312, 435.54388427734375, -5.2286834716796875, -17.482364654541016, 296.61065673828125, 322.20037841796875, 148.4530792236328, 885.9632568359375, -245.37930297851562, 5.296661376953125, 183.59561157226562, 801.63818359375, 177.32351684570312, 26.157733917236328, -358.6958923339844, -56.02129364013672, 725.6742553710938, 758.783203125, 71.6383056640625, -1.5869731903076172, 97.73756408691406, -239.93301391601562, 455.2762756347656, 497.810546875, -60.157630920410156, 208.97067260742188, 449.56353759765625, 232.74716186523438, -12.852165222167969, 1716.729248046875, 92.57534790039062, 447.8565368652344, -1205.197998046875, 131.920654296875, 1657.9722900390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000511.npy"}
|
||||
{"epoch": 0.750367107195301, "step": 512, "batch_size": 64, "mean": 414.32196044921875, "std": 422.2722473144531, "min": -374.234130859375, "p10": -77.06082000732417, "median": 372.9373474121094, "p90": 991.2586303710939, "max": 1760.5247802734375, "pos_frac": 0.84375, "sample": [1112.9320068359375, 961.63671875, 705.0360717773438, -314.98907470703125, -150.46311950683594, 50.367156982421875, 1760.5247802734375, 246.23211669921875, 67.37599182128906, -20.66922950744629, 119.19911193847656, 777.831787109375, 4.001031875610352, 427.68670654296875, -200.93783569335938, 44.464229583740234, 490.41943359375, 492.2033386230469, 307.12139892578125, -2.220792770385742, -193.499267578125, 682.1996459960938, 137.08489990234375, 526.1234130859375, 1175.5701904296875, 783.6392822265625, 249.1050262451172, 413.8323974609375, 100.43199920654297, 756.9940185546875, 345.10101318359375, 315.2676086425781, 514.1777954101562, 480.42523193359375, 220.7421112060547, 237.88035583496094, 1097.2640380859375, 292.88580322265625, -101.22864532470703, 868.65576171875, 150.15658569335938, 1054.85498046875, 413.0345458984375, 1003.9537353515625, 35.622955322265625, -374.234130859375, 407.5185546875, 17.348773956298828, 281.43157958984375, 435.0244140625, 1069.3311767578125, -17.61841583251953, 287.9001159667969, 663.6416625976562, 260.3713073730469, 526.861083984375, 703.6008911132812, 224.89163208007812, 400.773681640625, 831.253662109375, -201.0638885498047, 927.117919921875, 877.3228149414062, 757.105712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000512.npy"}
|
||||
{"epoch": 0.7518355359765051, "step": 513, "batch_size": 64, "mean": 301.99615478515625, "std": 487.93682861328125, "min": -690.7075805664062, "p10": -213.18499298095702, "median": 227.2471160888672, "p90": 1077.456787109375, "max": 1343.076171875, "pos_frac": 0.765625, "sample": [714.205322265625, 350.6400146484375, 281.08538818359375, 145.88919067382812, 255.11648559570312, 192.2020721435547, 295.1914978027344, 200.32028198242188, -193.87242126464844, 1037.6324462890625, 629.6205444335938, -125.73870849609375, 693.1864013671875, 37.89690399169922, 64.0167465209961, 737.7167358398438, 29.821775436401367, 55.83551025390625, 191.3198699951172, 365.475830078125, 947.2548828125, 1178.21044921875, 1253.970947265625, 254.1739501953125, 615.2218627929688, 30.27341079711914, 343.29815673828125, 187.63409423828125, 325.203857421875, -199.52069091796875, 49.518218994140625, 1217.86376953125, 539.4293823242188, 399.1265869140625, 262.59722900390625, -56.450775146484375, -31.485397338867188, -589.7417602539062, 264.37396240234375, 1343.076171875, -28.831008911132812, 152.22091674804688, 58.14739990234375, -690.7075805664062, 608.6564331054688, 7.634521484375, -453.6287536621094, -22.505685806274414, 1156.0411376953125, 940.875, -649.8278198242188, 9.344741821289062, 1079.216796875, 1248.920654296875, 302.4990234375, 197.89193725585938, -345.5802001953125, 595.0254516601562, -86.0922622680664, -219.04112243652344, -384.583251953125, 1073.35009765625, 140.0267333984375, 347.1097717285156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000513.npy"}
|
||||
{"epoch": 0.7533039647577092, "step": 514, "batch_size": 64, "mean": 303.3856506347656, "std": 505.4642639160156, "min": -785.5114135742188, "p10": -201.4287399291992, "median": 183.4027099609375, "p90": 1007.5641296386722, "max": 1815.375732421875, "pos_frac": 0.6875, "sample": [-521.7512817382812, -184.31991577148438, 98.42671203613281, -68.80017852783203, 1092.1318359375, 186.70932006835938, 604.8616943359375, 568.4607543945312, 1815.375732421875, 928.1642456054688, 153.9138946533203, 671.0927734375, -157.99932861328125, 162.27911376953125, 386.10772705078125, -73.11759948730469, -309.4903869628906, 159.551513671875, 1159.5908203125, 1107.450927734375, -208.76109313964844, -62.66596221923828, 385.2278747558594, -111.52687072753906, 37.38050079345703, 353.5763854980469, 360.926513671875, 161.937744140625, 537.5440063476562, -97.33521270751953, 883.6828002929688, 556.7454223632812, -155.57037353515625, -14.14373779296875, 180.09609985351562, 325.98406982421875, 1234.1033935546875, -149.09414672851562, -23.06726837158203, -222.72988891601562, 317.21710205078125, -508.8048095703125, -124.00983428955078, 496.00067138671875, 209.60348510742188, 128.85569763183594, -785.5114135742188, -174.5963897705078, -275.8328857421875, 809.769775390625, 228.49765014648438, 489.1942138671875, 467.2296142578125, 653.0538330078125, 1041.5926513671875, 150.14059448242188, 300.41912841796875, 665.030029296875, 726.9679565429688, 139.9995574951172, 802.6925048828125, 45.73857498168945, 134.07528686523438, 1728.409423828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000514.npy"}
|
||||
{"epoch": 0.7547723935389133, "step": 515, "batch_size": 64, "mean": 224.7902374267578, "std": 448.4010925292969, "min": -1588.1658935546875, "p10": -216.68487701416015, "median": 209.55753326416016, "p90": 741.2733642578128, "max": 1351.3326416015625, "pos_frac": 0.75, "sample": [-103.08500671386719, -98.69026184082031, 573.3716430664062, 838.9771728515625, 604.408935546875, -1588.1658935546875, 495.2304992675781, 408.42559814453125, 128.30938720703125, -554.9630737304688, -352.73577880859375, 34.99385070800781, -15.248931884765625, 170.02972412109375, 310.9964599609375, 289.656005859375, 185.45799255371094, 515.211181640625, -407.1138916015625, 37.28045654296875, 368.0500793457031, -202.07997131347656, 165.94798278808594, 349.502685546875, 186.2164764404297, 145.27403259277344, 681.9487915039062, -186.42657470703125, 517.7606811523438, 43.508934020996094, 766.6981811523438, 31.856882095336914, -222.94412231445312, 591.6307373046875, 271.57550048828125, 347.80712890625, 583.8185424804688, 12.95635986328125, 1223.8037109375, -167.39199829101562, 157.71768188476562, 1080.4732666015625, -368.37799072265625, -6.248374938964844, -369.1723937988281, 234.78875732421875, 106.96903228759766, 773.2115478515625, -18.656841278076172, 71.53326416015625, 22.989280700683594, 1064.4893798828125, 232.89859008789062, 123.60436248779297, 340.3938903808594, 580.04541015625, 466.6069641113281, -116.70761108398438, 310.73321533203125, 377.35662841796875, 337.80767822265625, 258.7668151855469, 1351.3326416015625, 392.15936279296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000515.npy"}
|
||||
{"epoch": 0.7562408223201175, "step": 516, "batch_size": 64, "mean": 342.5379943847656, "std": 421.19232177734375, "min": -673.2535400390625, "p10": -191.70615539550775, "median": 327.9738464355469, "p90": 868.5500305175783, "max": 1198.033935546875, "pos_frac": 0.765625, "sample": [236.41366577148438, 239.80618286132812, 791.572265625, -113.97467803955078, 553.5574340820312, 751.9182739257812, 454.0545959472656, -126.99928283691406, 262.16552734375, 440.8201904296875, 1198.033935546875, 504.0527648925781, 522.2657470703125, 477.182861328125, 153.8649139404297, -135.36502075195312, 571.5787963867188, -32.51203918457031, 111.736572265625, 1188.62646484375, -673.2535400390625, 99.17779541015625, -215.85235595703125, -411.6431579589844, 959.7613525390625, 496.923095703125, -502.6408386230469, 885.8800659179688, 1139.260986328125, 170.77284240722656, 266.4472961425781, 788.6898193359375, 1073.16455078125, 226.98394775390625, 578.2844848632812, 618.7950439453125, 309.47625732421875, -4.069950103759766, 444.97503662109375, 269.2442321777344, 152.77037048339844, -59.5338134765625, 828.11328125, 285.0483703613281, 141.48220825195312, 1095.650390625, 156.42994689941406, 157.11590576171875, -17.431434631347656, 463.57177734375, 433.04345703125, 485.87017822265625, 90.43441772460938, 570.7283935546875, 696.4788208007812, -238.78964233398438, -255.95668029785156, -426.8656005859375, 495.33709716796875, 671.0997314453125, 523.5833740234375, 811.005859375, 346.471435546875, -52.40077590942383], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000516.npy"}
|
||||
{"epoch": 0.7577092511013216, "step": 517, "batch_size": 64, "mean": 394.68890380859375, "std": 537.0808715820312, "min": -668.8670043945312, "p10": -176.79386901855466, "median": 356.9472198486328, "p90": 866.9944885253907, "max": 2343.83349609375, "pos_frac": 0.78125, "sample": [573.8851318359375, 745.8698120117188, 108.69192504882812, 378.31854248046875, 843.3380737304688, 206.54580688476562, -464.18780517578125, 847.3283081054688, 502.68572998046875, -217.5865936279297, 551.3358154296875, 1946.8824462890625, 1302.26025390625, 377.83599853515625, 556.7052001953125, -103.36913299560547, 278.7374572753906, 508.86907958984375, 66.44353485107422, 747.031005859375, 1374.466796875, 632.413330078125, 26.10001564025879, 227.29849243164062, -668.8670043945312, -285.82318115234375, 119.27950286865234, 383.4380798339844, -409.1744079589844, 2343.83349609375, 328.20819091796875, -30.602981567382812, -161.50636291503906, 875.4228515625, 109.692626953125, 611.1376953125, -469.6524658203125, 426.0242614746094, -67.298828125, 340.8360290527344, 753.832275390625, -114.2419662475586, 314.5352478027344, 297.3775329589844, 615.7964477539062, 535.5121459960938, 743.8638305664062, 375.0122375488281, 25.45355987548828, 1372.0423583984375, 128.61166381835938, 1128.531982421875, 373.05841064453125, 810.9664916992188, 94.492919921875, 749.517333984375, -183.3456573486328, 225.40078735351562, 217.3644561767578, 434.490478515625, -149.64422607421875, 820.1921997070312, 250.52865600585938, -22.104286193847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000517.npy"}
|
||||
{"epoch": 0.7591776798825257, "step": 518, "batch_size": 64, "mean": 324.5028076171875, "std": 416.84844970703125, "min": -832.964111328125, "p10": -164.0851364135742, "median": 277.9253845214844, "p90": 881.2076354980469, "max": 1213.65234375, "pos_frac": 0.796875, "sample": [103.21786499023438, 1059.395263671875, 507.3958740234375, 375.23114013671875, 225.1790008544922, 277.86334228515625, 492.1708984375, 587.7155151367188, 53.1950569152832, 238.94564819335938, 865.6121215820312, 654.7269287109375, 146.3148193359375, -247.31480407714844, 496.1772155761719, 249.8029022216797, 357.31207275390625, 95.13593292236328, 57.435882568359375, 493.6397705078125, 877.3634033203125, -35.729774475097656, -262.2424621582031, 138.86416625976562, -4.406009674072266, 318.0357360839844, 777.102783203125, 104.88677978515625, 685.077392578125, 911.98193359375, 843.5164794921875, 25.288253784179688, 1196.5074462890625, 1213.65234375, 766.1153564453125, 708.5, 147.8140869140625, -107.84115600585938, 481.3829650878906, 628.8104248046875, 277.9874267578125, -104.62249755859375, 480.1250305175781, -174.00137329101562, 437.83819580078125, -407.0811767578125, -368.14825439453125, -832.964111328125, 333.5197448730469, 422.3194274902344, 36.19743347167969, 209.236572265625, -140.94725036621094, 184.67715454101562, -342.0260009765625, 1038.95849609375, 962.2861938476562, 311.8324279785156, -104.86276245117188, 882.8551635742188, 272.8556823730469, 129.92225646972656, 528.7442626953125, 229.6444854736328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000518.npy"}
|
||||
{"epoch": 0.7606461086637298, "step": 519, "batch_size": 64, "mean": 238.39837646484375, "std": 431.2276306152344, "min": -906.4351806640625, "p10": -291.03977966308594, "median": 249.06765747070312, "p90": 813.7243408203127, "max": 1212.6063232421875, "pos_frac": 0.671875, "sample": [-40.972747802734375, 559.9146728515625, 365.4709777832031, -365.7731018066406, 1021.1005249023438, -279.8383483886719, 98.36698913574219, 476.2901611328125, 1025.84326171875, 584.0471801757812, 101.58116149902344, 581.3392333984375, -189.7376708984375, 833.3198852539062, 373.3324279785156, -800.4478149414062, 1175.49072265625, 583.6439208984375, 82.34691619873047, -12.31947135925293, 290.941162109375, -108.56239318847656, 49.671478271484375, 336.3828125, 522.486083984375, 131.66909790039062, 274.70599365234375, 529.1383056640625, 499.6285400390625, 43.258567810058594, -29.96588134765625, 669.017333984375, -131.07791137695312, -100.255126953125, 336.7912902832031, 83.66138458251953, 524.6340942382812, -2.7158279418945312, -316.8563232421875, -16.989700317382812, 334.6925048828125, -253.97848510742188, 945.8272094726562, 419.025390625, -906.4351806640625, 551.215087890625, 1212.6063232421875, 40.92723846435547, -59.69122314453125, -16.558792114257812, 176.418212890625, 871.7313232421875, -313.5516662597656, 592.2330322265625, -295.84039306640625, 223.4293212890625, 768.0014038085938, 17.56417465209961, 382.9076232910156, 543.3019409179688, 276.5872802734375, -28.353404998779297, 325.1293640136719, -308.2544860839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000519.npy"}
|
||||
{"epoch": 0.762114537444934, "step": 520, "batch_size": 64, "mean": 300.2342529296875, "std": 470.6655578613281, "min": -1022.561279296875, "p10": -164.7825942993164, "median": 207.77989959716797, "p90": 919.8718017578125, "max": 1997.6778564453125, "pos_frac": 0.75, "sample": [-90.4458999633789, -226.4208984375, 1002.378173828125, 148.16465759277344, 432.9857482910156, 154.17129516601562, 358.2278747558594, -76.58587646484375, -165.50042724609375, 189.43426513671875, 17.087051391601562, -55.200775146484375, 644.87548828125, -66.83979797363281, -42.610389709472656, 514.6027221679688, 1165.253173828125, 1997.6778564453125, -210.44442749023438, 353.3175354003906, 1095.111572265625, 1264.100341796875, -401.6649169921875, 169.798828125, -43.806182861328125, 709.6419067382812, 126.2660140991211, 603.2789306640625, 473.84906005859375, 317.4398193359375, 255.46951293945312, 249.04664611816406, 1263.30224609375, 109.33351135253906, 57.742279052734375, 466.1555480957031, 528.013916015625, 434.7480773925781, 718.5999755859375, -1022.561279296875, 477.7033386230469, 152.69703674316406, 199.4171905517578, 305.7647705078125, 50.523162841796875, 907.1442260742188, 48.61748504638672, 914.14892578125, 449.61163330078125, 90.94818115234375, 198.83348083496094, -250.38693237304688, -229.68038940429688, 292.1588134765625, 216.14260864257812, 533.240478515625, -119.76983642578125, 922.324462890625, -163.10765075683594, 488.8022766113281, 268.764404296875, -96.81639099121094, 6.441497802734375, 133.4771728515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000520.npy"}
|
||||
{"epoch": 0.7635829662261381, "step": 521, "batch_size": 64, "mean": 327.1440734863281, "std": 436.8957214355469, "min": -754.3360595703125, "p10": -105.43234176635741, "median": 248.70413970947266, "p90": 959.8477844238283, "max": 1605.49169921875, "pos_frac": 0.78125, "sample": [457.7850341796875, 750.917724609375, 239.07395935058594, 26.53948974609375, -37.143226623535156, -754.3360595703125, -155.46322631835938, -19.2353515625, -84.2591552734375, 520.3048095703125, 47.64897155761719, -176.44198608398438, 5.865848541259766, 481.80865478515625, 442.3562927246094, 1037.2593994140625, 90.35247802734375, 12.56955337524414, -252.02662658691406, 915.3209228515625, 195.7595977783203, -71.08177185058594, -90.87645721435547, 235.89674377441406, 517.016845703125, 611.4857788085938, 561.127685546875, 258.3343200683594, 110.1486587524414, 17.48175048828125, 377.954833984375, 557.3749389648438, -41.09400177001953, 997.59765625, 183.80514526367188, 598.037109375, 521.7479248046875, 176.79336547851562, 531.6536865234375, -117.95439910888672, 309.9302062988281, 350.4217529296875, 295.682373046875, 68.61112976074219, 227.67864990234375, 1091.94970703125, -111.67057800292969, 1079.482177734375, -16.761322021484375, -553.39599609375, 563.95703125, 383.33392333984375, 657.9982299804688, 28.456771850585938, 679.6282348632812, 978.9307250976562, 170.0455780029297, 1443.336181640625, 26.110862731933594, 1605.49169921875, 446.2208557128906, 64.48924255371094, 702.4774169921875, 764.708984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000521.npy"}
|
||||
{"epoch": 0.7650513950073421, "step": 522, "batch_size": 64, "mean": 291.8514709472656, "std": 459.6984558105469, "min": -775.175048828125, "p10": -146.8851776123047, "median": 215.33358001708984, "p90": 1036.589538574219, "max": 1344.6593017578125, "pos_frac": 0.8125, "sample": [219.20481872558594, 668.6227416992188, 839.7688598632812, -9.775779724121094, 1054.065673828125, 139.724853515625, 1344.6593017578125, 251.12985229492188, 60.34444808959961, 13.096019744873047, -141.09225463867188, 367.6535339355469, 750.882568359375, 42.40069580078125, 453.6529846191406, -64.42880249023438, 289.669677734375, -88.38609313964844, 385.00567626953125, 211.46234130859375, 617.8331909179688, 588.9210815429688, 133.67572021484375, 282.02569580078125, -639.749267578125, 1094.5858154296875, 336.6429443359375, 149.4221649169922, 488.47625732421875, 578.4570922851562, 328.66259765625, 152.74021911621094, 98.32893371582031, 39.163299560546875, 236.12384033203125, -775.175048828125, 533.3579711914062, 124.32476043701172, 176.84344482421875, -374.92071533203125, -149.36785888671875, 396.8583984375, 995.8118896484375, 114.99970245361328, -426.791015625, 517.2696533203125, 560.6229248046875, 202.6610107421875, -686.2192993164062, 1077.6871337890625, 1073.9827880859375, 26.600128173828125, 1243.075439453125, 94.6010513305664, 135.50584411621094, -490.56390380859375, 46.361671447753906, -77.8078842163086, 607.2328491210938, 234.43136596679688, 939.2957153320312, 81.36798095703125, 1105.2852783203125, 98.19117736816406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000522.npy"}
|
||||
{"epoch": 0.7665198237885462, "step": 523, "batch_size": 64, "mean": 316.2938537597656, "std": 531.8327026367188, "min": -946.4376831054688, "p10": -290.5767883300781, "median": 286.3076477050781, "p90": 1010.0177124023438, "max": 1580.076416015625, "pos_frac": 0.703125, "sample": [-157.5538787841797, 1145.6951904296875, 105.43049621582031, 291.48834228515625, 378.5599365234375, -293.03155517578125, 382.42010498046875, 528.5198974609375, 863.78369140625, 337.33685302734375, -744.5036010742188, 694.2874145507812, 3.7416858673095703, -11.864105224609375, -387.5995178222656, 65.71946716308594, 259.0289306640625, 808.08984375, -509.1053771972656, 213.36981201171875, 495.56170654296875, 998.8731689453125, -116.64312744140625, 4.861345291137695, 485.69769287109375, 119.45237731933594, 213.82102966308594, 100.70010375976562, -49.459999084472656, 1580.076416015625, 496.2807312011719, 117.20957946777344, -77.74276733398438, -946.4376831054688, 1014.7939453125, 805.4105834960938, -438.5771179199219, -28.80732536315918, 489.84515380859375, -95.48103332519531, 635.6173095703125, -65.42720031738281, 1575.5587158203125, 361.1246032714844, 677.6920776367188, -17.934173583984375, 372.1143493652344, 281.126953125, 1557.213623046875, 392.6451110839844, 590.914306640625, 154.08004760742188, 531.9337768554688, 130.52978515625, 714.6420288085938, 1174.8819580078125, 808.054931640625, -83.99839782714844, 625.0482177734375, 1105.877197265625, 583.8084716796875, -676.2114868164062, -284.8489990234375, -44.88558578491211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000523.npy"}
|
||||
{"epoch": 0.7679882525697503, "step": 524, "batch_size": 64, "mean": 331.36962890625, "std": 487.57525634765625, "min": -1446.7333984375, "p10": -83.89690322875975, "median": 402.09141540527344, "p90": 860.809851074219, "max": 1724.6036376953125, "pos_frac": 0.75, "sample": [57.31108093261719, 963.0281982421875, 415.267578125, 464.79925537109375, 365.0636291503906, 296.169921875, 361.0087890625, 11.202522277832031, 532.693115234375, 287.44354248046875, 448.15087890625, 104.75497436523438, 418.3742370605469, 520.8283081054688, 676.9427490234375, 408.1316833496094, -11.138702392578125, 879.7081298828125, 290.1712646484375, 570.7625122070312, -1446.7333984375, 415.340576171875, 512.2404174804688, 1724.6036376953125, -18.247650146484375, -38.712860107421875, 739.5152587890625, 370.1566467285156, 504.1092529296875, 9.364559173583984, -23.79513931274414, 315.0504150390625, 486.25885009765625, -184.64891052246094, 687.5615234375, 245.79013061523438, 1172.4718017578125, 1403.59765625, 646.5862426757812, 442.19915771484375, -31.251426696777344, -826.4942626953125, 21.85688018798828, -95.85216522216797, 663.2813110351562, -87.60528564453125, 366.075927734375, 6.428047180175781, 469.619384765625, 437.0698547363281, 1030.325439453125, 986.4872436523438, 565.4285888671875, -789.41845703125, -75.24401092529297, -13.044723510742188, 816.7138671875, 642.0269775390625, -54.765342712402344, 399.9449768066406, 573.949951171875, 404.23785400390625, -154.18426513671875, -71.31265258789062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000524.npy"}
|
||||
{"epoch": 0.7694566813509545, "step": 525, "batch_size": 64, "mean": 422.78076171875, "std": 508.2586975097656, "min": -785.1712036132812, "p10": -112.19880065917968, "median": 316.63587951660156, "p90": 1262.7388183593757, "max": 1599.6221923828125, "pos_frac": 0.796875, "sample": [317.28924560546875, 1042.031982421875, 337.5355224609375, 87.3991928100586, -13.906744003295898, 429.90185546875, 1349.4232177734375, 756.2666015625, 381.1684265136719, -153.41677856445312, 1373.159423828125, -278.906494140625, 820.9473266601562, 243.71397399902344, 241.13369750976562, -142.6362762451172, 167.73841857910156, 325.5757751464844, 889.5460205078125, 687.63134765625, 606.8651733398438, 112.67190551757812, -114.43524169921875, 184.45071411132812, -133.11212158203125, 332.1822509765625, 497.356689453125, 524.6246337890625, 378.3186340332031, 1008.8670654296875, 1037.3690185546875, 67.3582992553711, 1458.33740234375, -14.914924621582031, 1599.6221923828125, 312.55572509765625, 126.69408416748047, 248.97323608398438, 365.771240234375, -106.98043823242188, 1519.2490234375, -785.1712036132812, -49.65093994140625, -7.81794548034668, 231.84060668945312, 253.66635131835938, 479.2501525878906, 222.81430053710938, 553.3566284179688, 173.79736328125, 748.8978271484375, 1360.858154296875, -67.66830444335938, 29.89971923828125, 706.6762084960938, 244.7948760986328, 315.9825134277344, 135.390380859375, -463.30621337890625, 1060.4752197265625, 237.65939331054688, 1527.305419921875, 394.2328796386719, 881.2646484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000525.npy"}
|
||||
{"epoch": 0.7709251101321586, "step": 526, "batch_size": 64, "mean": 413.08404541015625, "std": 511.3519287109375, "min": -800.5677490234375, "p10": -193.2379959106445, "median": 452.4098205566406, "p90": 1095.0435180664062, "max": 1415.6788330078125, "pos_frac": 0.765625, "sample": [1103.36865234375, 711.8162841796875, 103.85371398925781, 182.10107421875, 327.9171142578125, 291.68218994140625, -147.2572784423828, -23.06220054626465, 488.225341796875, -654.21533203125, 592.7713012695312, 664.73828125, 790.7596435546875, 240.03921508789062, 1249.9571533203125, 981.2232666015625, -108.81167602539062, 689.9887084960938, 299.4004211425781, 583.7362670898438, 344.24151611328125, 513.4544677734375, 243.43739318847656, 1069.53955078125, 492.44866943359375, 560.4257202148438, 1415.6788330078125, 60.37904357910156, 454.91717529296875, -200.3927459716797, -214.6545867919922, 631.061767578125, -380.8511657714844, 1303.7220458984375, -176.5435791015625, 290.9914245605469, 994.0946044921875, 819.2491455078125, 186.54989624023438, -800.5677490234375, 809.6973266601562, 583.2734375, 449.9024658203125, 1098.111328125, -431.3633728027344, 715.7691650390625, 669.3316650390625, 1090.1673583984375, -79.38402557373047, 511.54095458984375, -83.208984375, 259.78460693359375, 317.4760437011719, 1097.13330078125, 973.0599365234375, -19.84561538696289, 1121.154296875, 668.0303344726562, 417.31011962890625, 8.485366821289062, -149.7427978515625, 74.38007354736328, -726.8246459960938, 1087.7252197265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000526.npy"}
|
||||
{"epoch": 0.7723935389133627, "step": 527, "batch_size": 64, "mean": 282.3432312011719, "std": 415.23419189453125, "min": -569.6371459960938, "p10": -189.05686492919918, "median": 243.23734283447266, "p90": 760.3781921386719, "max": 1186.994873046875, "pos_frac": 0.765625, "sample": [-553.1806640625, 536.8192138671875, 387.9982604980469, 349.3760070800781, 651.2440185546875, 167.33029174804688, 1186.994873046875, -122.45549011230469, -129.09927368164062, 763.7156372070312, -137.98143005371094, -155.76455688476562, 633.1315307617188, 279.6668395996094, -569.6371459960938, -532.6928100585938, 567.9945678710938, -52.23822784423828, 587.5016479492188, 311.37591552734375, -401.02001953125, 984.2718505859375, 151.1839599609375, 27.161556243896484, 150.83114624023438, 718.6439819335938, -18.904922485351562, 55.86518096923828, 949.1351318359375, 666.8314208984375, 553.42626953125, 1028.031494140625, 419.4903564453125, 663.4173583984375, 1145.6988525390625, 999.196044921875, 752.5908203125, -45.157508850097656, 85.93791198730469, 238.01535034179688, 645.4901123046875, 195.440185546875, 296.03631591796875, 248.45933532714844, -128.95248413085938, 56.60498046875, 265.302978515625, 366.9440002441406, 134.46502685546875, -495.22467041015625, 289.5273742675781, 170.07675170898438, 669.3757934570312, 216.0518798828125, 18.065704345703125, 75.40049743652344, 685.3844604492188, 549.091552734375, 512.4608154296875, 223.7331085205078, 190.7697296142578, -249.611083984375, 43.6531982421875, -203.3249969482422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000527.npy"}
|
||||
{"epoch": 0.7738619676945668, "step": 528, "batch_size": 64, "mean": 358.5581970214844, "std": 562.3563232421875, "min": -600.9801025390625, "p10": -347.31591796875, "median": 294.6216735839844, "p90": 1174.8984619140629, "max": 1857.431884765625, "pos_frac": 0.71875, "sample": [-3.6351699829101562, 1247.659912109375, 303.29571533203125, 359.9427795410156, 176.25482177734375, -440.364501953125, -38.27075958251953, 703.2715454101562, 625.2940673828125, 1235.9202880859375, 350.1016845703125, -125.11875915527344, 415.9482421875, -469.435302734375, 113.1301040649414, 441.36419677734375, 301.91729736328125, -124.44921875, 1068.173583984375, 1857.431884765625, -70.4173355102539, 968.350830078125, 425.18231201171875, 702.1782836914062, 504.6599426269531, 922.2241821289062, -552.356201171875, 1297.00634765625, -251.40512084960938, 1034.7890625, -530.2913208007812, 287.3260498046875, 413.2421875, 243.34298706054688, 326.779541015625, 172.66929626464844, 551.6635131835938, 192.35902404785156, 506.1371765136719, 113.5125503540039, -169.6992645263672, 892.8654174804688, -150.40188598632812, -600.9801025390625, 1430.26904296875, 268.7784729003906, 604.6187744140625, 1727.494140625, 127.60835266113281, 43.58074951171875, -544.21337890625, -358.8065185546875, 724.8447265625, 1220.6376953125, 114.42623901367188, 866.8724975585938, 574.464599609375, 172.97348022460938, -320.5045166015625, -15.277740478515625, 85.88037109375, -119.01728820800781, 1017.1864624023438, 98.7397689819336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000528.npy"}
|
||||
{"epoch": 0.775330396475771, "step": 529, "batch_size": 64, "mean": 244.05267333984375, "std": 540.3975219726562, "min": -1316.924560546875, "p10": -415.1270416259766, "median": 255.9566192626953, "p90": 875.0877807617189, "max": 1493.1700439453125, "pos_frac": 0.65625, "sample": [260.0087890625, -197.96275329589844, 1493.1700439453125, -55.00750732421875, 150.0079803466797, 521.1974487304688, 493.9058532714844, 748.22265625, 581.548095703125, 1438.3465576171875, -288.8548278808594, 504.37823486328125, 134.59860229492188, -399.5232238769531, 494.3603210449219, -117.02198028564453, 138.01565551757812, 55.925071716308594, 693.8150024414062, -76.7000503540039, -348.5086975097656, 324.42083740234375, 1111.517822265625, 782.9811401367188, 952.5321655273438, -644.5049438476562, -35.94178009033203, 375.1580505371094, -482.888916015625, 631.6480712890625, 251.90444946289062, 492.1279602050781, -129.72592163085938, 370.4105224609375, -43.261077880859375, 49.22004699707031, 732.5025634765625, 147.32115173339844, 569.5712280273438, 698.3722534179688, -1316.924560546875, 126.06340789794922, 982.7916259765625, -498.19189453125, 576.5751342773438, -894.2567749023438, 136.89437866210938, 542.9136352539062, 895.3790893554688, -10.813072204589844, -679.1033325195312, 442.5999755859375, -62.864845275878906, 572.8621826171875, -421.81439208984375, 454.0222473144531, 226.12356567382812, 827.7413940429688, 497.56787109375, 1221.2393798828125, -328.1725158691406, -86.38652038574219, -290.7207946777344, 328.5588073730469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000529.npy"}
|
||||
{"epoch": 0.7767988252569751, "step": 530, "batch_size": 64, "mean": 382.5972900390625, "std": 645.4879760742188, "min": -2000.821044921875, "p10": -184.49969024658202, "median": 269.96055603027344, "p90": 1287.190075683594, "max": 1986.138671875, "pos_frac": 0.734375, "sample": [285.5620422363281, -348.8468933105469, -12.7275390625, 154.88980102539062, 268.3919982910156, -161.66903686523438, 1696.1951904296875, 614.2518310546875, 305.5815124511719, -570.1636962890625, 190.85365295410156, -54.39049530029297, -4.65362548828125, -93.12625122070312, 703.744873046875, 707.7337646484375, -17.102766036987305, 60.62945556640625, -342.10906982421875, 1181.8531494140625, 1621.0816650390625, 500.9061584472656, 950.6145629882812, 691.5626831054688, -73.81605529785156, 811.3409423828125, -107.105224609375, 219.19992065429688, -194.2842559814453, 651.7120361328125, -2.3673324584960938, 1589.5802001953125, 1171.9334716796875, 495.59246826171875, 364.0762023925781, 45.0327262878418, 858.3846435546875, 145.74630737304688, -297.7543640136719, -400.4933776855469, 200.4508514404297, 1707.1470947265625, 636.6036376953125, 1986.138671875, 76.596923828125, 315.8100280761719, 310.8710021972656, 271.52911376953125, 1510.552001953125, 170.22097778320312, 383.5251159667969, 18.320348739624023, 818.5599365234375, -2000.821044921875, 1265.971923828125, 55.66749572753906, -12.3387451171875, 1296.2835693359375, 502.3303527832031, 285.588623046875, 80.35841369628906, 218.85028076171875, 545.0986938476562, 237.07308959960938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000530.npy"}
|
||||
{"epoch": 0.7782672540381792, "step": 531, "batch_size": 64, "mean": 329.83197021484375, "std": 541.7294311523438, "min": -1009.7593994140625, "p10": -258.4589767456054, "median": 275.5526885986328, "p90": 1009.1005065917972, "max": 1808.4969482421875, "pos_frac": 0.78125, "sample": [592.776611328125, 127.29119110107422, 726.3770751953125, 1042.9810791015625, 746.7888793945312, -475.22540283203125, 131.52328491210938, 213.19186401367188, 1680.33251953125, -196.79953002929688, -226.4503936767578, 429.7344055175781, 9.429044723510742, 385.8694152832031, 279.29144287109375, 180.28334045410156, 418.0731201171875, 214.8727569580078, -1009.7593994140625, 338.11199951171875, 930.0458374023438, 693.5508422851562, 351.09893798828125, 80.81489562988281, 831.1497802734375, 188.189697265625, 417.7082824707031, 245.1844024658203, 473.4039306640625, -54.774925231933594, 1253.447998046875, -272.17694091796875, 652.8519897460938, 632.4296875, 60.24278259277344, 271.8139343261719, -32.29297637939453, -567.3812255859375, -16.180784225463867, 180.736083984375, 1808.4969482421875, 327.1463623046875, 72.2861557006836, 7.5357208251953125, 1651.458740234375, -407.66339111328125, 512.30615234375, 1457.9310302734375, 281.8287353515625, 866.2880249023438, 1158.9656982421875, -16.781959533691406, 587.220947265625, -196.97555541992188, 292.9745178222656, 139.08596801757812, 434.6729736328125, 117.37800598144531, 405.5441589355469, 9.344192504882812, 215.154296875, 494.974853515625, -753.5184936523438, -284.96209716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000531.npy"}
|
||||
{"epoch": 0.7797356828193832, "step": 532, "batch_size": 64, "mean": 361.05938720703125, "std": 657.469970703125, "min": -1334.456298828125, "p10": -421.58934326171874, "median": 287.59149169921875, "p90": 1284.9066528320318, "max": 1909.6993408203125, "pos_frac": 0.75, "sample": [1169.4229736328125, 467.2064208984375, 1602.9317626953125, -191.89767456054688, 1381.8272705078125, 236.05905151367188, 828.6094970703125, 812.227783203125, -1334.456298828125, 288.7621765136719, -334.38287353515625, 1357.1064453125, 512.1929931640625, 676.2833251953125, -425.7490234375, 666.3355712890625, 555.62939453125, -1181.0472412109375, 229.38682556152344, 286.4208068847656, 421.7725830078125, 1707.26708984375, 1909.6993408203125, 878.2803344726562, 641.6634521484375, 682.1851806640625, -151.52320861816406, 1119.6754150390625, 1334.399658203125, 157.3104248046875, -74.18897247314453, 289.7071838378906, -151.0853271484375, 1031.2734375, 771.3453369140625, 174.96560668945312, 197.30862426757812, 1076.15185546875, -64.31632232666016, 296.1657409667969, 56.29729461669922, 839.3046875, 145.31658935546875, 582.2845458984375, 218.06715393066406, -75.85596466064453, 3.7725906372070312, 680.5311889648438, 182.7559814453125, 40.71471405029297, -486.9562072753906, -540.5106811523438, 271.998779296875, 1599.8184814453125, 265.63140869140625, -54.434814453125, 308.6672668457031, -952.2546997070312, 45.9755859375, 232.86195373535156, -411.8834228515625, 625.80126953125, -617.0643920898438, 296.03369140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000532.npy"}
|
||||
{"epoch": 0.7812041116005873, "step": 533, "batch_size": 64, "mean": 258.5040283203125, "std": 552.2969360351562, "min": -1503.364013671875, "p10": -317.6853454589844, "median": 243.83811950683594, "p90": 915.8158752441407, "max": 2089.2587890625, "pos_frac": 0.703125, "sample": [378.8831481933594, -359.19476318359375, 713.517333984375, -520.2552490234375, 426.5504150390625, 172.02755737304688, 1151.19775390625, 586.6150512695312, 276.3494873046875, 331.05645751953125, 762.031005859375, 639.9660034179688, 630.0161743164062, 737.5643920898438, -189.51943969726562, 412.1318359375, 903.4552612304688, 151.40597534179688, 313.3570556640625, -799.1295166015625, 1273.621337890625, 260.81463623046875, 43.22467041015625, 533.0916748046875, 62.54423904418945, 594.0459594726562, -51.95447540283203, -7.9790191650390625, 226.86160278320312, 2089.2587890625, 416.78985595703125, 413.9728698730469, -104.11285400390625, 47.452980041503906, 34.25539779663086, -1503.364013671875, -330.7845764160156, 387.4222106933594, -224.52850341796875, 380.8053283691406, 149.9241180419922, 215.66868591308594, 1204.7926025390625, 1202.8780517578125, -196.23068237304688, 28.492294311523438, -308.8045654296875, -4.1074981689453125, 296.6363830566406, 290.1319274902344, 311.2635803222656, -70.64750671386719, 921.11328125, 102.49901580810547, -321.49139404296875, -282.2308044433594, -753.6974487304688, 177.8607177734375, 145.10760498046875, 1105.9342041015625, -193.36520385742188, -4.405853271484375, 770.531982421875, 496.9402770996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000533.npy"}
|
||||
{"epoch": 0.7826725403817915, "step": 534, "batch_size": 64, "mean": 348.64691162109375, "std": 525.1646118164062, "min": -893.8709716796875, "p10": -192.01328735351564, "median": 266.32556915283203, "p90": 922.2540893554689, "max": 1901.4661865234375, "pos_frac": 0.78125, "sample": [395.817626953125, 928.636962890625, -40.91419982910156, 1901.4661865234375, 56.29805374145508, 774.9592895507812, -479.0343017578125, 311.6721496582031, 360.59490966796875, -468.9155578613281, 76.82229614257812, 1221.937255859375, -192.64804077148438, 185.11294555664062, 1096.72119140625, 881.2493286132812, 833.8202514648438, -190.53219604492188, 335.963623046875, 906.10791015625, 250.77284240722656, -141.80404663085938, 369.1322937011719, 468.34051513671875, 281.8782958984375, 356.61676025390625, 982.658935546875, -118.12191772460938, 399.3546142578125, 182.11880493164062, 138.38375854492188, 818.3192138671875, 234.8904571533203, 178.03549194335938, 663.33447265625, -563.7745361328125, -117.52249145507812, 1594.1268310546875, 764.1416625976562, 907.3607177734375, 155.67816162109375, 146.0735321044922, -45.6864013671875, 152.34884643554688, 765.470947265625, 244.6505584716797, 58.06330490112305, 896.26171875, 1425.724609375, 903.7682495117188, 718.3455200195312, 313.7392883300781, 619.532470703125, 43.12456130981445, 348.7341003417969, 160.1737060546875, 303.7822570800781, 217.05938720703125, 242.93075561523438, 40.29931640625, -156.57553100585938, -893.8709716796875, -293.08477783203125, -596.5193481445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000534.npy"}
|
||||
{"epoch": 0.7841409691629956, "step": 535, "batch_size": 64, "mean": 396.780029296875, "std": 537.9991455078125, "min": -854.108642578125, "p10": -225.5595733642578, "median": 378.2223205566406, "p90": 1018.092236328125, "max": 2183.694580078125, "pos_frac": 0.796875, "sample": [-341.4013671875, 2183.694580078125, -299.56121826171875, 497.48828125, -234.97474670410156, 216.2791748046875, 97.49200439453125, 970.6635131835938, 147.98040771484375, 619.3370971679688, 229.8002471923828, 21.905860900878906, 256.4176330566406, 945.585205078125, 454.7432861328125, 418.335205078125, 94.59037017822266, 1341.535888671875, 245.29489135742188, 410.73602294921875, 905.0324096679688, 507.97235107421875, 538.104736328125, 457.51702880859375, -20.738876342773438, 1483.0367431640625, 1185.695556640625, 517.1328125, -52.535308837890625, 333.1309509277344, 1513.4368896484375, 673.8948364257812, 1015.603271484375, -363.8713684082031, -321.1799011230469, -854.108642578125, 485.7068176269531, 717.6597290039062, 139.16299438476562, 451.8658447265625, 345.7086181640625, 1019.158935546875, 793.2733154296875, 210.49896240234375, 460.0212707519531, -88.21452331542969, 27.48711395263672, 692.2218017578125, -203.59083557128906, -515.18603515625, 576.8367919921875, 52.85796356201172, 418.37103271484375, 36.65093231201172, -139.5122528076172, -78.75885009765625, 956.16943359375, 139.40719604492188, 1490.992919921875, 252.41278076171875, 29.755685806274414, 522.2162475585938, 323.66156005859375, 483.02093505859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000535.npy"}
|
||||
{"epoch": 0.7856093979441997, "step": 536, "batch_size": 64, "mean": 485.10693359375, "std": 675.7999267578125, "min": -1175.79833984375, "p10": -56.10823287963867, "median": 297.19915771484375, "p90": 1172.5166137695312, "max": 3862.385009765625, "pos_frac": 0.828125, "sample": [90.8951644897461, 809.1957397460938, 296.63702392578125, 985.5801391601562, 1225.872802734375, 23.444351196289062, 180.03463745117188, 135.724609375, 73.09307861328125, -27.712615966796875, 891.68115234375, 347.5636901855469, 1169.4154052734375, 842.5475463867188, -181.19544982910156, 532.4952392578125, 1311.439208984375, 104.67416381835938, 918.166015625, 766.0474243164062, 2076.76513671875, 590.971923828125, -94.6231918334961, 799.2711181640625, -48.372596740722656, 1173.845703125, 24.568260192871094, 592.1119384765625, 627.6751098632812, -52.0828857421875, 3862.385009765625, 768.4339599609375, 298.04718017578125, 233.19161987304688, 533.6359252929688, 724.8716430664062, 128.4680633544922, 1064.3343505859375, -321.7752380371094, 100.06060791015625, 127.93150329589844, -1175.79833984375, 1589.31494140625, 948.8792724609375, 721.1411743164062, -142.46092224121094, 121.25125122070312, 236.50714111328125, 261.4117431640625, 131.855712890625, 297.76129150390625, -57.83338165283203, 228.2969970703125, 219.7138671875, 246.03823852539062, 762.633056640625, 120.55726623535156, -39.61919021606445, -342.17034912109375, 35.94927978515625, 899.383544921875, 718.728759765625, 367.02630615234375, 1192.963134765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000536.npy"}
|
||||
{"epoch": 0.7870778267254038, "step": 537, "batch_size": 64, "mean": 315.5430603027344, "std": 551.7247314453125, "min": -1292.76025390625, "p10": -305.2695556640625, "median": 261.96263122558594, "p90": 1063.087048339844, "max": 1343.551025390625, "pos_frac": 0.796875, "sample": [-1292.76025390625, -777.4710693359375, 28.464431762695312, 122.02017974853516, -417.8280029296875, 272.27813720703125, 24.76887321472168, 903.3817749023438, 11.129104614257812, -49.979373931884766, 453.3975524902344, 24.497756958007812, 506.9180603027344, 336.29864501953125, -306.78277587890625, 850.086669921875, 1119.074951171875, -50.25396728515625, 151.0845947265625, -18.99309539794922, 6.866731643676758, 765.0379028320312, 46.861305236816406, -774.1895141601562, 921.912353515625, -301.73870849609375, 1297.5865478515625, 1129.0528564453125, 337.8877868652344, 608.8134765625, 1004.38427734375, 306.809814453125, 1040.932373046875, 311.65155029296875, 772.493896484375, 23.028053283691406, 95.33470153808594, 555.9805908203125, 251.64712524414062, -458.04718017578125, 207.25643920898438, 387.9020690917969, -53.51176452636719, -868.5283203125, 40.0283317565918, 1072.5819091796875, -80.41586303710938, 172.32643127441406, 328.6527099609375, 835.67236328125, 156.49124145507812, 1343.551025390625, 124.60790252685547, 1217.5955810546875, 566.908935546875, 100.30220794677734, 1291.0972900390625, 123.70613098144531, 913.5929565429688, 370.67352294921875, 494.66644287109375, 674.2569580078125, 37.764373779296875, 905.9381713867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000537.npy"}
|
||||
{"epoch": 0.788546255506608, "step": 538, "batch_size": 64, "mean": 372.283447265625, "std": 506.0456848144531, "min": -544.698486328125, "p10": -218.8054000854492, "median": 384.0868835449219, "p90": 944.5154785156252, "max": 1988.9552001953125, "pos_frac": 0.734375, "sample": [147.2898712158203, 465.18768310546875, 743.0931396484375, 1122.71728515625, 549.0198974609375, -173.1973876953125, 894.2175903320312, 559.7501220703125, 536.6629028320312, 403.5022888183594, -58.40563201904297, 1647.2901611328125, 508.58380126953125, 485.78271484375, 262.6871032714844, 872.0368041992188, -544.698486328125, -27.65912628173828, 362.23663330078125, 1127.21337890625, 277.15582275390625, 483.1634826660156, 108.62356567382812, 661.648193359375, 514.0892333984375, 105.49226379394531, -42.336181640625, 1505.109619140625, -29.785110473632812, -377.8954162597656, -518.3202514648438, 781.7225952148438, 159.2376251220703, -99.11222839355469, 1988.9552001953125, 674.48388671875, 687.5807495117188, 515.2808837890625, 364.6714782714844, 704.2816162109375, 193.8453369140625, -134.15509033203125, 13.303913116455078, 621.0968627929688, 291.7861022949219, 713.5670776367188, -500.5455627441406, 538.6590576171875, -221.84902954101562, 108.41130065917969, 553.6314697265625, 417.2862243652344, 266.0958251953125, 633.610107421875, -211.70359802246094, -50.786888122558594, 300.53125, 27.432445526123047, 1120.6668701171875, 966.0717163085938, -362.05963134765625, -269.4805908203125, 551.6268310546875, -88.25836944580078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000538.npy"}
|
||||
{"epoch": 0.7900146842878121, "step": 539, "batch_size": 64, "mean": 347.94537353515625, "std": 462.8359680175781, "min": -822.2125244140625, "p10": -144.34913787841793, "median": 317.1759033203125, "p90": 892.8924133300785, "max": 1568.848388671875, "pos_frac": 0.78125, "sample": [288.43170166015625, 345.92010498046875, 99.49556732177734, -10.048295974731445, 361.5869140625, 442.86346435546875, -237.6935272216797, 30.12842559814453, -16.889495849609375, 1324.4818115234375, -322.413330078125, -25.460275650024414, 1568.848388671875, 164.06654357910156, 938.07275390625, 589.4007568359375, 741.1832885742188, 668.4091186523438, 52.9036865234375, 787.4716186523438, 1503.1552734375, 142.59930419921875, 277.176025390625, 700.3861083984375, 135.98333740234375, 200.14437866210938, 45.82585144042969, 472.69952392578125, -66.42939758300781, 687.0103759765625, -368.1435241699219, 587.2735595703125, -822.2125244140625, 522.5499877929688, 602.5008544921875, 76.6336669921875, 48.089820861816406, 519.987548828125, 187.21578979492188, -106.94882202148438, -111.29457092285156, 1325.740478515625, -199.34616088867188, 672.266845703125, -59.0002326965332, -158.515380859375, 979.9348754882812, -280.8507080078125, 43.610313415527344, 30.958839416503906, 595.743408203125, 184.95492553710938, 771.6548461914062, 445.925537109375, 446.63623046875, 347.80596923828125, 765.8799438476562, 542.640380859375, 404.3408203125, 539.2449951171875, 26.469038009643555, 1214.2105712890625, 539.9266357421875, 63.30851745605469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000539.npy"}
|
||||
{"epoch": 0.7914831130690162, "step": 540, "batch_size": 64, "mean": 461.1107177734375, "std": 636.3046875, "min": -741.3790283203125, "p10": -279.51945037841784, "median": 366.1020812988281, "p90": 1428.9562133789063, "max": 2530.441650390625, "pos_frac": 0.75, "sample": [465.4664611816406, 585.464111328125, 738.0836181640625, -403.3931884765625, 1886.9940185546875, -123.95703125, 294.6344299316406, 106.92181396484375, -741.3790283203125, -478.6680908203125, -106.22864532470703, 536.127685546875, -145.32154846191406, 285.8150634765625, 2530.441650390625, -123.59583282470703, 249.82473754882812, 103.19906616210938, 625.459228515625, 517.0883178710938, -17.888702392578125, 177.50213623046875, 426.7146301269531, 763.8726196289062, 783.7288818359375, 1330.5946044921875, 595.9721069335938, 1445.0423583984375, 1464.9600830078125, 424.461181640625, 623.0120849609375, 411.94757080078125, 636.8419799804688, 101.39942169189453, 320.256591796875, 447.1512756347656, -51.781219482421875, -15.954803466796875, 1572.266357421875, 307.1485290527344, 1071.445068359375, 1810.3707275390625, 1474.963623046875, 1248.604248046875, -412.7009582519531, 676.8839111328125, 228.73977661132812, 258.49462890625, 636.2333374023438, 313.2637939453125, -337.0328369140625, 292.7913513183594, 674.5819702148438, 774.1190185546875, 163.82345581054688, 282.62158203125, 775.8087158203125, 179.52023315429688, 1391.421875, -49.49958038330078, -37.229637145996094, 659.7310791015625, -713.9684448242188, -402.1252136230469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000540.npy"}
|
||||
{"epoch": 0.7929515418502202, "step": 541, "batch_size": 64, "mean": 243.34228515625, "std": 560.9125366210938, "min": -1181.81640625, "p10": -408.38843383789055, "median": 208.36014556884766, "p90": 928.2657592773443, "max": 1862.882568359375, "pos_frac": 0.71875, "sample": [-843.317138671875, 185.24398803710938, 638.297119140625, 88.71377563476562, 169.02053833007812, 67.64816284179688, 376.8235168457031, 92.85102844238281, 227.80108642578125, -112.7398681640625, -104.53231811523438, 1258.30810546875, -832.6580200195312, -6.8634490966796875, 1004.327392578125, 48.214210510253906, 267.0911560058594, 1228.7384033203125, 85.84505462646484, 771.4954833984375, 1551.886474609375, 800.6141967773438, -0.230499267578125, -528.0142822265625, 220.4332275390625, -269.2791748046875, 402.7799072265625, 307.0099182128906, -434.77490234375, 672.1240234375, 669.8370361328125, 62.80333709716797, 750.2535400390625, 181.62388610839844, 174.20883178710938, 144.37179565429688, 148.79644775390625, 151.77281188964844, 603.2274169921875, 422.96759033203125, -134.98606872558594, 196.7809295654297, 704.2371215820312, 982.9735717773438, 591.2987060546875, 288.36572265625, -1181.81640625, -548.1343994140625, 440.1781311035156, 630.165283203125, -319.9785461425781, 405.3799133300781, 297.6109924316406, 1862.882568359375, 1127.37646484375, -346.82000732421875, -892.2590942382812, 506.6162414550781, -241.07809448242188, -260.9356994628906, -141.74090576171875, 219.93936157226562, 467.7738037109375, 277.356689453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000541.npy"}
|
||||
{"epoch": 0.7944199706314243, "step": 542, "batch_size": 64, "mean": 453.1556091308594, "std": 631.8525390625, "min": -1174.165771484375, "p10": -284.3477310180663, "median": 372.9001922607422, "p90": 1151.4187255859376, "max": 2169.35205078125, "pos_frac": 0.796875, "sample": [1101.0723876953125, 325.12799072265625, 249.80560302734375, 241.73556518554688, 183.61749267578125, 994.0892944335938, -345.56646728515625, 2169.35205078125, 1172.9957275390625, 428.9527587890625, 16.249130249023438, 487.3814697265625, 865.27294921875, -46.425140380859375, -67.04155731201172, 457.0392150878906, 1945.6025390625, 305.06488037109375, -71.29412078857422, 1782.8876953125, 328.6889343261719, 131.5789337158203, 460.3580322265625, 489.71844482421875, -345.6002502441406, 263.82830810546875, -179.59027099609375, -405.2418212890625, -196.2577362060547, 1014.984619140625, -322.1005859375, 900.4818115234375, 932.2742309570312, 170.57229614257812, 1659.010986328125, -62.19620132446289, 670.908935546875, 493.3016052246094, -567.0504760742188, 623.0171508789062, 306.99609375, 399.2326354980469, 694.9031982421875, 166.0479736328125, 1548.330322265625, 177.20693969726562, 252.87826538085938, -1174.165771484375, 726.9771118164062, 163.0102996826172, 646.9982299804688, 348.35089111328125, 104.80165100097656, 397.4494934082031, 1613.8056640625, 1095.03515625, 243.57350158691406, -906.3959350585938, 1058.4163818359375, 240.93209838867188, 480.5727844238281, 619.9910888671875, 681.4251098632812, 858.9783935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000542.npy"}
|
||||
{"epoch": 0.7958883994126285, "step": 543, "batch_size": 64, "mean": 327.8123474121094, "std": 755.3506469726562, "min": -1023.5202026367188, "p10": -347.608984375, "median": 144.61444854736328, "p90": 1296.4673339843755, "max": 3413.46728515625, "pos_frac": 0.65625, "sample": [22.416046142578125, -53.78887939453125, 735.0440063476562, -50.8369140625, 2.151205062866211, 2301.818359375, 4.399759292602539, -42.78443908691406, -1023.5202026367188, 831.3065185546875, -7.297035217285156, -780.6921997070312, -3.3405494689941406, 181.7252197265625, 2287.32958984375, 51.93855285644531, 37.16426086425781, 3413.46728515625, -525.5147705078125, 15.215888977050781, 529.9960327148438, -350.91180419921875, -294.578125, 196.4829559326172, -391.9460754394531, -125.2253189086914, -96.86669921875, 529.222900390625, -339.90240478515625, 250.4123077392578, 552.2970581054688, 220.48109436035156, -356.06109619140625, 1338.707275390625, -153.62435913085938, -245.3427276611328, 1938.96875, 577.095458984375, 1197.907470703125, 69.7729263305664, 1562.8118896484375, 142.90589904785156, -176.93223571777344, 361.708740234375, 814.9577026367188, 15.259696960449219, 330.9091796875, 1767.2510986328125, -365.90362548828125, 583.4500122070312, 220.80233764648438, -101.88860321044922, 356.4002685546875, -269.4975891113281, 467.43023681640625, 601.6290283203125, 301.8758850097656, -195.44754028320312, 37.43260192871094, 360.6446838378906, 146.322998046875, 539.0279541015625, 542.4673461914062, 493.28533935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000543.npy"}
|
||||
{"epoch": 0.7973568281938326, "step": 544, "batch_size": 64, "mean": 376.4363708496094, "std": 569.7345581054688, "min": -1007.627685546875, "p10": -209.25299835205078, "median": 363.4873504638672, "p90": 1142.582836914063, "max": 1782.491455078125, "pos_frac": 0.75, "sample": [1245.91015625, 641.4193115234375, 352.95220947265625, 548.1907958984375, 1645.9910888671875, -909.8585205078125, 378.8853454589844, -105.57039642333984, 746.1978759765625, -15.147645950317383, 1191.4210205078125, -49.83164978027344, 692.6737670898438, 258.63800048828125, -997.6639404296875, 338.8780822753906, 634.6284790039062, 230.05316162109375, 106.24420166015625, 425.0372009277344, 374.0224914550781, 458.23419189453125, -219.62130737304688, 244.55581665039062, 98.01510620117188, 112.22280883789062, 751.5697631835938, 124.4228515625, -352.66302490234375, -212.04025268554688, 1600.1483154296875, 184.50576782226562, -71.0050277709961, 614.3165283203125, -1007.627685546875, 1028.6270751953125, 473.70245361328125, 385.5986328125, 639.9945068359375, -202.74940490722656, 295.06982421875, -11.531375885009766, 274.22308349609375, 1601.512451171875, 711.3428344726562, -77.12284851074219, 172.13157653808594, 1782.491455078125, 851.6235961914062, 570.4150390625, 29.912960052490234, 156.47958374023438, 751.2333374023438, 1414.6710205078125, 577.6229248046875, 405.2154846191406, 481.2323913574219, -165.7983856201172, 510.5694885253906, 98.84666442871094, -22.301132202148438, 755.2884521484375, -247.05233764648438, 792.57421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000544.npy"}
|
||||
{"epoch": 0.7988252569750367, "step": 545, "batch_size": 64, "mean": 449.6326904296875, "std": 572.400390625, "min": -669.4094848632812, "p10": -115.39449081420895, "median": 313.7082061767578, "p90": 1409.4041137695312, "max": 1716.2310791015625, "pos_frac": 0.84375, "sample": [-152.49105834960938, -3.240753173828125, -615.8695068359375, 33.205841064453125, 285.1319580078125, 888.432373046875, 224.02926635742188, 69.70018005371094, 560.7819213867188, -443.1504821777344, 440.8944396972656, 699.297119140625, 42.18962860107422, 616.7157592773438, 1225.8321533203125, 102.59375762939453, 1589.2919921875, 1442.8182373046875, 1414.5968017578125, 374.4332275390625, 828.9728393554688, 43.83572006225586, 295.7601318359375, 284.87286376953125, 5.363059997558594, 331.6562805175781, 74.204833984375, 407.92333984375, 847.98974609375, -75.49776458740234, 1397.287841796875, 492.848388671875, 134.9328155517578, -132.4930877685547, 1716.2310791015625, 941.1566772460938, 22.072463989257812, 1466.386474609375, -573.7566528320312, 498.26287841796875, -10.954490661621094, 416.0259704589844, 131.64373779296875, 573.3065185546875, 392.0847473144531, 692.7310180664062, 906.489501953125, 1364.7515869140625, 275.3653259277344, 285.63385009765625, 156.60231018066406, 84.70159912109375, 278.7278747558594, 426.8327331542969, 73.01541137695312, 1636.9176025390625, 1288.997314453125, -271.3075256347656, 359.5113830566406, 195.11846923828125, 713.8516845703125, -669.4094848632812, 182.3842010498047, 1490.268798828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000545.npy"}
|
||||
{"epoch": 0.8002936857562408, "step": 546, "batch_size": 64, "mean": 301.70965576171875, "std": 496.46099853515625, "min": -1541.517333984375, "p10": -243.1973098754883, "median": 261.12965393066406, "p90": 876.794500732422, "max": 1363.9864501953125, "pos_frac": 0.75, "sample": [-236.39926147460938, 104.52918243408203, 335.9444580078125, 11.95928955078125, 232.42498779296875, 545.8179321289062, 247.84048461914062, 558.2568969726562, 176.17369079589844, 216.993408203125, -178.12850952148438, -246.23623657226562, -215.4935760498047, -188.70916748046875, 764.9134521484375, 112.04087829589844, 41.372840881347656, 380.1271667480469, 965.9915161132812, 252.49713134765625, -58.87999725341797, 847.46875, -295.6146545410156, 1206.2213134765625, 668.7665405273438, 887.1526489257812, 793.4058837890625, 468.61328125, 852.62548828125, 374.2065124511719, 1363.9864501953125, -265.219482421875, -11.533683776855469, 439.4698791503906, 559.3084716796875, -788.4990844726562, 663.2811279296875, 562.0174560546875, 734.1748657226562, 892.1190795898438, -1541.517333984375, 733.873779296875, 260.48846435546875, 277.7813415527344, 465.0060729980469, 802.0952758789062, 68.26632690429688, 774.3382568359375, -62.116127014160156, 144.11720275878906, 60.29218292236328, 24.958059310913086, 761.0640258789062, 261.7708435058594, -241.45375061035156, 576.3572998046875, 159.26263427734375, 1120.424560546875, -243.94454956054688, 92.82527923583984, -116.98332214355469, 427.5423583984375, -319.01312255859375, 1048.995849609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000546.npy"}
|
||||
{"epoch": 0.801762114537445, "step": 547, "batch_size": 64, "mean": 394.68609619140625, "std": 549.3321533203125, "min": -1178.05126953125, "p10": -165.9199440002441, "median": 369.6005554199219, "p90": 1023.425927734375, "max": 1963.1400146484375, "pos_frac": 0.765625, "sample": [595.69189453125, 50.91321563720703, -1178.05126953125, 957.572509765625, -49.3029899597168, 51.272674560546875, 313.4190673828125, 794.7158813476562, 234.73834228515625, 552.0128784179688, 148.21795654296875, 1616.947265625, 10.849552154541016, 1754.83935546875, -210.8589324951172, -68.05622100830078, 501.962158203125, 612.4823608398438, -187.42576599121094, 1013.1279296875, -28.21910858154297, 88.79193878173828, 434.5853271484375, 284.7328186035156, 671.1011962890625, -250.16314697265625, 535.8350830078125, 182.6880340576172, 219.5498809814453, 371.4236145019531, 1027.83935546875, -112.23136901855469, 508.9006042480469, 685.234619140625, 565.937255859375, 486.6966857910156, -97.19731140136719, 945.2992553710938, 367.7774963378906, 69.40618896484375, 741.3477783203125, 1963.1400146484375, 604.3968505859375, -572.0083618164062, -201.43731689453125, 16.252647399902344, -357.6383056640625, 1147.3963623046875, 116.82356262207031, 634.6490478515625, 1606.2412109375, 217.6809539794922, 616.720458984375, 424.78045654296875, 202.85557556152344, 315.70562744140625, 756.8414306640625, -57.00714111328125, 395.78399658203125, -73.42282104492188, 710.9041748046875, 1270.546630859375, 422.0409851074219, -115.73969268798828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000547.npy"}
|
||||
{"epoch": 0.8032305433186491, "step": 548, "batch_size": 64, "mean": 348.1575927734375, "std": 579.3407592773438, "min": -970.20703125, "p10": -211.69056396484373, "median": 289.9557189941406, "p90": 834.5145080566408, "max": 2739.76025390625, "pos_frac": 0.734375, "sample": [-14.002792358398438, 802.2655029296875, 353.09808349609375, 320.13916015625, -130.9598388671875, 357.2655944824219, -970.20703125, 1335.2012939453125, 407.676513671875, 400.5210266113281, 422.17962646484375, 539.6346435546875, 1089.291259765625, 241.99005126953125, 704.7322998046875, -253.43206787109375, -122.22328186035156, 352.0167541503906, 797.27880859375, 182.8175048828125, 901.8207397460938, 286.1044616699219, 203.1324462890625, -425.50396728515625, 467.6463928222656, 122.0661849975586, 180.96168518066406, 293.8069763183594, 304.1378173828125, 173.15650939941406, -255.05307006835938, 2739.76025390625, 786.6417236328125, 209.20065307617188, 137.41366577148438, -457.4928894042969, -71.88493347167969, 679.5634765625, -191.72232055664062, 2122.65087890625, -15.848331451416016, -38.09365463256836, -259.27423095703125, 445.1290283203125, -90.9063720703125, 61.182403564453125, 373.33465576171875, 135.42242431640625, 423.9108581542969, -220.24838256835938, 517.1697387695312, 376.4149475097656, -62.34486389160156, 1938.89501953125, 389.8022155761719, 614.871337890625, 112.25004577636719, -35.323150634765625, 198.8983612060547, 251.5040283203125, 23.806114196777344, 558.826171875, 848.3355102539062, 712.681396484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000548.npy"}
|
||||
{"epoch": 0.8046989720998532, "step": 549, "batch_size": 64, "mean": 269.5531005859375, "std": 534.2130126953125, "min": -1020.0985107421875, "p10": -268.22358703613276, "median": 194.38426208496094, "p90": 918.0298645019533, "max": 1659.8084716796875, "pos_frac": 0.65625, "sample": [302.7396545410156, 821.9788818359375, 48.17396545410156, -31.075538635253906, 140.89634704589844, 128.01263427734375, -163.3743438720703, 545.03662109375, 708.2279052734375, 857.50244140625, -145.87831115722656, 1291.6356201171875, 109.91886138916016, 171.79837036132812, -101.49725341796875, -287.45208740234375, 938.5438842773438, -325.1523132324219, -121.78194427490234, -747.8109130859375, 634.2962036132812, 1054.7171630859375, -349.8836364746094, 221.15740966796875, 304.1446228027344, 220.873779296875, 444.5323486328125, -75.14324188232422, 743.7387084960938, 870.163818359375, -108.45159912109375, 217.14048767089844, 270.6425476074219, -223.35708618164062, 400.66729736328125, -1020.0985107421875, 216.97015380859375, 613.3525390625, 114.8405990600586, 62.25337600708008, -19.351837158203125, -65.3313217163086, 316.16326904296875, 658.58203125, 1402.532470703125, -66.99800109863281, -198.7511444091797, 332.99725341796875, -371.7166748046875, 1643.3262939453125, 1330.449951171875, -746.1065673828125, -44.66276550292969, 713.49755859375, 448.98095703125, -157.79544067382812, 139.90411376953125, 158.19598388671875, 620.24169921875, 1659.8084716796875, 386.01043701171875, 281.65875244140625, 136.1039581298828, -59.340736389160156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000549.npy"}
|
||||
{"epoch": 0.8061674008810573, "step": 550, "batch_size": 64, "mean": 322.6148376464844, "std": 509.4976806640625, "min": -1061.054443359375, "p10": -278.4778381347655, "median": 308.55914306640625, "p90": 848.1503234863281, "max": 1967.3153076171875, "pos_frac": 0.8125, "sample": [331.3052673339844, 336.7983093261719, 1138.4881591796875, 336.4660949707031, 244.90371704101562, 739.3897094726562, 1049.4996337890625, 305.15240478515625, -140.9923553466797, 258.0875244140625, 584.498779296875, 385.25213623046875, 534.6263427734375, 125.66918182373047, 311.96588134765625, 1766.5406494140625, 84.56005096435547, -332.1145935058594, 79.81895446777344, 204.03363037109375, -1061.054443359375, -13.278533935546875, 534.450439453125, 831.2341918945312, -89.50648498535156, 853.1089477539062, 836.5802001953125, 142.9644775390625, 258.48394775390625, 122.19065856933594, 40.72560119628906, -756.4954833984375, 264.7227783203125, -120.78865814208984, 443.3674621582031, 832.654296875, 58.71766662597656, 530.636962890625, 1967.3153076171875, 442.9591369628906, 295.7491455078125, 642.6286010742188, 51.09608840942383, 41.9385986328125, 1195.3563232421875, 428.0106201171875, 1044.6192626953125, 117.43251037597656, 83.59420013427734, 573.6541137695312, 445.5326843261719, 600.4304809570312, -615.79296875, 563.4008178710938, -153.32540893554688, 315.07476806640625, 243.37840270996094, 487.8172912597656, 355.4890441894531, -377.9486083984375, 488.3541259765625, -388.57916259765625, -365.937255859375, 112.43791198730469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000550.npy"}
|
||||
{"epoch": 0.8076358296622613, "step": 551, "batch_size": 64, "mean": 537.0025634765625, "std": 531.70263671875, "min": -413.5666809082031, "p10": -56.82378005981443, "median": 498.00140380859375, "p90": 1391.6067260742188, "max": 2011.931640625, "pos_frac": 0.859375, "sample": [15.30807876586914, -29.67303466796875, 495.4585266113281, 790.9732055664062, 621.27490234375, 536.2249145507812, -413.5666809082031, 20.760791778564453, 1358.9652099609375, -261.4112548828125, -37.06885528564453, 292.28955078125, -65.29017639160156, 550.67919921875, 577.4763793945312, 1597.16943359375, 500.5442810058594, 389.82891845703125, 506.54437255859375, 1634.5733642578125, 979.997314453125, 379.81292724609375, 535.0941162109375, 614.8216552734375, 294.07379150390625, 278.62054443359375, 954.4076538085938, 413.032958984375, 586.9393310546875, 335.4765625, 378.3161926269531, 1.0750999450683594, 1528.625, 151.07550048828125, 536.8433837890625, 622.6932373046875, 751.0720825195312, 721.4270629882812, 542.5828857421875, 129.07212829589844, -177.38510131835938, 1143.9298095703125, 1405.595947265625, 459.7474060058594, 27.469921112060547, 1554.6954345703125, -379.9389343261719, -268.9532165527344, 1446.833251953125, 810.4071044921875, -248.09567260742188, 141.15029907226562, 1317.9892578125, 887.014404296875, 428.3531494140625, 590.4212646484375, 782.832763671875, 337.3484802246094, 401.2078857421875, 988.3936157226562, 154.6304473876953, 2011.931640625, 416.058837890625, 320.40570068359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000551.npy"}
|
||||
{"epoch": 0.8091042584434655, "step": 552, "batch_size": 64, "mean": 586.982177734375, "std": 698.5947875976562, "min": -690.9524536132812, "p10": -80.78665542602535, "median": 506.0690155029297, "p90": 1428.2556396484379, "max": 2675.06591796875, "pos_frac": 0.859375, "sample": [741.8590698242188, 105.66326141357422, 1054.0665283203125, 2609.431396484375, 771.9454345703125, 1164.77392578125, 568.8108520507812, 1006.6298828125, 23.23944091796875, 512.1934814453125, 641.16748046875, 504.3123474121094, 143.88131713867188, 455.45458984375, 167.1819305419922, 600.785888671875, 249.06521606445312, 663.1082153320312, 1307.5428466796875, -405.9273986816406, 74.02096557617188, 684.338623046875, 491.1546630859375, 378.4052429199219, -690.9524536132812, 16.71973419189453, 37.346168518066406, 1479.4473876953125, 188.5369110107422, -236.63531494140625, 1344.9736328125, -596.2922973632812, 816.24365234375, -36.621559143066406, 492.7081298828125, 122.53221130371094, 484.256103515625, 2432.099853515625, 1362.3438720703125, 1814.0482177734375, 545.3001708984375, -157.09994506835938, 507.82568359375, 885.6370849609375, 618.6163330078125, 2675.06591796875, 254.39443969726562, 461.43524169921875, 1456.5035400390625, 2227.33349609375, 530.7739868164062, 125.36598205566406, 454.27471923828125, -415.4388122558594, -32.814056396484375, 553.03662109375, -99.71455383300781, 115.78498840332031, 407.15081787109375, 554.5494384765625, 660.86669921875, 934.6976318359375, 731.1434326171875, 28.31218719482422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000552.npy"}
|
||||
{"epoch": 0.8105726872246696, "step": 553, "batch_size": 64, "mean": 308.27679443359375, "std": 559.7786254882812, "min": -723.8419799804688, "p10": -160.89324798583982, "median": 217.4594497680664, "p90": 1045.5955383300784, "max": 2547.1787109375, "pos_frac": 0.734375, "sample": [2547.1787109375, 922.7122802734375, -152.4412841796875, 436.1979064941406, 460.2156982421875, -107.91620635986328, 53.397216796875, 40.11431121826172, 148.17210388183594, 248.54437255859375, 99.5982666015625, 213.9416046142578, 106.71871185302734, 78.57656860351562, -63.12421417236328, 261.191162109375, -489.9905700683594, 16.44134521484375, 366.13336181640625, -114.05519104003906, 230.18679809570312, 190.2409210205078, 348.9710998535156, -505.7462158203125, 498.73699951171875, -15.965225219726562, 1130.6541748046875, 89.41424560546875, -14.719474792480469, 305.436279296875, -164.51551818847656, 388.796142578125, -149.7352294921875, -131.35411071777344, 852.6343994140625, 292.96044921875, 361.514892578125, 103.4901123046875, 1070.4573974609375, -49.014122009277344, 419.0456848144531, -723.8419799804688, 615.2815551757812, 506.2861633300781, 226.87954711914062, 111.84326171875, -652.135498046875, 32.9014892578125, 519.2777099609375, 22.00713348388672, 1128.9178466796875, -52.431861877441406, 1311.3607177734375, 530.58935546875, 831.338623046875, 1127.91455078125, 220.977294921875, 987.5845336914062, -489.64752197265625, 1865.327880859375, -277.243896484375, 890.70703125, 65.52765655517578, 607.1983642578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000553.npy"}
|
||||
{"epoch": 0.8120411160058737, "step": 554, "batch_size": 64, "mean": 411.3079833984375, "std": 720.21435546875, "min": -1548.0126953125, "p10": -454.34450073242186, "median": 390.6107482910156, "p90": 1385.6586059570318, "max": 2303.512451171875, "pos_frac": 0.765625, "sample": [168.1457061767578, -726.2325439453125, 826.1987915039062, 547.309326171875, 233.97702026367188, 938.951416015625, 359.2031555175781, 425.5627746582031, -422.1098937988281, 411.60595703125, 2303.512451171875, 424.2862548828125, 431.0587158203125, 328.5660705566406, -195.28001403808594, 451.0206298828125, 575.0430908203125, 566.2373046875, 571.810791015625, -18.031883239746094, -631.1844482421875, -682.212646484375, -468.1593322753906, 7.536491394042969, 2118.126953125, -173.76043701171875, 225.98837280273438, 426.3752136230469, 461.18719482421875, 864.5079956054688, 571.4049072265625, 1447.0908203125, 1618.7833251953125, 179.0963134765625, 942.8638916015625, 341.2980651855469, 910.3201904296875, 4.575130462646484, 1669.1011962890625, -150.9154052734375, -67.08511352539062, 33.30486297607422, 1123.0634765625, 619.7333984375, 985.381103515625, 88.80448913574219, 322.757568359375, 5.066864013671875, 895.4109497070312, 792.1461791992188, -861.0226440429688, 617.2542724609375, 290.5660705566406, -688.0187377929688, 1121.89990234375, -1548.0126953125, -312.4252624511719, 0.9637508392333984, 369.61553955078125, 1242.3167724609375, 1730.2977294921875, 1750.550537109375, 117.34691619873047, -189.06219482421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000554.npy"}
|
||||
{"epoch": 0.8135095447870778, "step": 555, "batch_size": 64, "mean": 482.5061950683594, "std": 735.4490356445312, "min": -1135.4232177734375, "p10": -226.7660247802734, "median": 412.6693115234375, "p90": 1563.3124389648438, "max": 2262.47509765625, "pos_frac": 0.78125, "sample": [468.6966552734375, 404.12811279296875, 1399.4525146484375, 282.1488342285156, 699.4784545898438, 1225.8260498046875, 421.21051025390625, 240.23751831054688, -197.96807861328125, 178.58847045898438, -141.58103942871094, 856.4907836914062, 779.417236328125, 533.7931518554688, 21.4228515625, 58.8062744140625, 167.11813354492188, -239.10800170898438, 2262.47509765625, 1190.8121337890625, -1.21759033203125, 2000.8551025390625, 87.18775177001953, 459.6181640625, 488.27227783203125, 94.36477661132812, 780.8148193359375, 38.53468322753906, 450.2962951660156, 112.26493835449219, 731.1075439453125, 1082.10400390625, 630.9755859375, 507.7672424316406, 19.266845703125, 186.7102813720703, 680.3658447265625, -673.4085693359375, 201.23880004882812, -139.25714111328125, -969.0103759765625, 1113.1119384765625, 1765.487060546875, 2255.41015625, -537.8377685546875, 474.7958679199219, 2074.2060546875, 177.02171325683594, 493.1833801269531, 53.204097747802734, 1551.3482666015625, -498.84857177734375, 1568.43994140625, -1135.4232177734375, -187.8594970703125, -137.42410278320312, 1789.2373046875, 547.4234619140625, -160.83267211914062, 198.3145751953125, 1253.3917236328125, 333.7257385253906, -265.8956298828125, 775.92041015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000555.npy"}
|
||||
{"epoch": 0.8149779735682819, "step": 556, "batch_size": 64, "mean": 294.10113525390625, "std": 440.38470458984375, "min": -495.514892578125, "p10": -187.3568572998047, "median": 239.7291259765625, "p90": 911.5039428710938, "max": 1526.4415283203125, "pos_frac": 0.71875, "sample": [1275.9451904296875, 234.088134765625, 71.90994262695312, -118.7130126953125, 285.3130187988281, 142.3787841796875, 569.4574584960938, 435.2638244628906, -180.93331909179688, 348.1390380859375, 19.347312927246094, 679.1626586914062, 280.6680603027344, -190.10980224609375, 198.28466796875, 547.5515747070312, 1132.056884765625, -10.30218505859375, 692.7508544921875, -155.72308349609375, -495.514892578125, -92.1314926147461, -316.04632568359375, 156.1074981689453, -399.7078857421875, -105.0028076171875, 362.5730895996094, 27.610153198242188, 915.4767456054688, -310.754638671875, 380.3109130859375, 289.3624267578125, 198.14390563964844, 101.28565216064453, 787.9747924804688, -46.261253356933594, 432.9361572265625, 137.5610809326172, 205.67947387695312, -43.75116729736328, -79.9405517578125, 794.6356201171875, 189.6398468017578, 141.77813720703125, -230.44363403320312, 535.8192138671875, 902.2340698242188, 499.81756591796875, 958.1264038085938, 69.4074478149414, 245.3701171875, -139.95968627929688, 830.9840698242188, 323.34735107421875, 1526.4415283203125, -102.22411346435547, 926.3687744140625, 1441.050537109375, 475.7162780761719, 446.01434326171875, 404.67333984375, -318.4945983886719, 283.63916015625, 256.081787109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000556.npy"}
|
||||
{"epoch": 0.8164464023494861, "step": 557, "batch_size": 64, "mean": 442.61669921875, "std": 711.7261352539062, "min": -1500.56591796875, "p10": -298.71486511230466, "median": 359.02040100097656, "p90": 1300.555981445313, "max": 2706.271484375, "pos_frac": 0.765625, "sample": [-1500.56591796875, 196.56187438964844, -1058.8525390625, 831.2073974609375, 373.2291564941406, 254.21697998046875, 131.34690856933594, 524.8873291015625, 697.4561767578125, 81.26513671875, 952.4544067382812, 1352.47509765625, 1057.05908203125, 66.4365234375, 159.29415893554688, 308.1005859375, 467.91986083984375, 949.8596801757812, -41.416316986083984, 87.61719512939453, 83.62831115722656, 1166.62255859375, 1648.8441162109375, 690.2171630859375, -4.879638671875, 538.1494140625, 43.28790283203125, -310.67669677734375, -293.5022277832031, 845.519775390625, 167.9549560546875, 675.769775390625, 1132.376708984375, 543.2548217773438, 220.27317810058594, 194.63722229003906, 439.7907409667969, -522.3713989257812, 895.0634155273438, 554.2045288085938, 807.4834594726562, -300.9488525390625, 542.5813598632812, 138.81495666503906, 90.13459777832031, -471.47796630859375, 692.3670654296875, 1706.6673583984375, -466.6527404785156, 2706.271484375, -48.83964920043945, 2262.79248046875, 1523.420654296875, 182.73880004882812, 1069.822021484375, -141.4720001220703, -291.9942626953125, 1485.5128173828125, -267.03240966796875, 1179.411376953125, -53.267295837402344, 469.0917663574219, 344.8116455078125, 568.516357421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000557.npy"}
|
||||
{"epoch": 0.8179148311306902, "step": 558, "batch_size": 64, "mean": 584.4525146484375, "std": 631.5555419921875, "min": -1704.5823974609375, "p10": -116.14519042968747, "median": 504.88079833984375, "p90": 1356.765234375, "max": 1928.3779296875, "pos_frac": 0.8125, "sample": [394.177490234375, 1113.0860595703125, 207.9611053466797, 1371.136962890625, 180.45059204101562, 349.8731994628906, 575.9198608398438, 62.53718566894531, 845.1064453125, -371.6000671386719, -183.31295776367188, 1261.4736328125, 1288.3048095703125, 1063.43994140625, 461.18853759765625, 1106.2811279296875, 637.1328735351562, 392.6020812988281, 1683.9761962890625, 1720.0672607421875, -223.93301391601562, 361.62286376953125, 327.7200927734375, 1224.7132568359375, -61.49427032470703, 700.7255859375, 1323.231201171875, 416.76702880859375, 1431.42724609375, 539.1257934570312, 412.7117004394531, 562.6368408203125, 1014.8228149414062, 1279.95166015625, 1392.3548583984375, 1006.9008178710938, -1704.5823974609375, -92.67276000976562, 452.3456115722656, 173.1165313720703, 400.80816650390625, -0.5188884735107422, 126.28471374511719, 400.33709716796875, 492.7025146484375, -84.18362426757812, -222.1577911376953, 1928.3779296875, 1017.2117309570312, -0.9239883422851562, 770.427001953125, 1056.246337890625, 562.6340942382812, 517.05908203125, 1102.9063720703125, -642.8848266601562, 772.3023681640625, 371.9615173339844, 457.1256103515625, -126.20480346679688, 217.54876708984375, 1455.55615234375, 1087.600830078125, 1047.450439453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000558.npy"}
|
||||
{"epoch": 0.8193832599118943, "step": 559, "batch_size": 64, "mean": 494.1765441894531, "std": 636.841796875, "min": -851.0772094726562, "p10": -87.81757354736324, "median": 364.3088836669922, "p90": 1261.2559814453125, "max": 2370.71435546875, "pos_frac": 0.8125, "sample": [973.0742797851562, 1244.920654296875, 623.634765625, 654.8351440429688, 71.64575958251953, 50.45611572265625, 140.78175354003906, 158.76808166503906, 1324.647705078125, 2370.71435546875, 1158.049072265625, 421.821044921875, 1129.567626953125, 13.686735153198242, 164.77291870117188, 766.02587890625, 1115.845703125, 1317.666259765625, -851.0772094726562, -442.8284912109375, 1268.2568359375, 306.463623046875, 190.14794921875, 402.70904541015625, 165.1835479736328, 1072.62841796875, -162.18289184570312, -103.34173583984375, 1123.0845947265625, 200.77410888671875, -11.050590515136719, 301.702880859375, 113.02152252197266, 51.147499084472656, 122.97892761230469, -51.59452819824219, 28.24466323852539, -43.873802185058594, 247.18528747558594, 211.8626708984375, 1923.066650390625, 551.5856323242188, 1163.341796875, 105.88028717041016, -727.2195434570312, 658.1229248046875, 633.45947265625, 602.9415283203125, 1135.4385986328125, -49.514793395996094, 496.2749328613281, 328.9513854980469, 399.6663818359375, 1148.26708984375, 1456.709716796875, 1915.252685546875, -462.6384582519531, 735.9534301757812, 1041.9569091796875, 464.1016845703125, -22.013967514038086, 95.85887145996094, -532.5374145507812, 724.0377197265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000559.npy"}
|
||||
{"epoch": 0.8208516886930984, "step": 560, "batch_size": 64, "mean": 358.7589111328125, "std": 579.1507568359375, "min": -980.9002075195312, "p10": -247.44680633544922, "median": 296.016357421875, "p90": 1233.1477294921876, "max": 2054.414306640625, "pos_frac": 0.734375, "sample": [2054.414306640625, -371.6884765625, -132.89505004882812, 1135.986083984375, -443.3343811035156, 577.0216064453125, 67.03716278076172, -128.03070068359375, 68.336669921875, 416.4681701660156, 239.38726806640625, 140.19265747070312, 225.28863525390625, 554.9109497070312, 1520.011962890625, -249.53973388671875, 767.0536499023438, 1266.89111328125, 574.1310424804688, 1024.10546875, 742.003662109375, -31.33843994140625, 187.90945434570312, 1317.310546875, 890.0802612304688, -606.16650390625, 26.121158599853516, 1296.544921875, 427.876220703125, -165.39804077148438, 814.8439331054688, -242.5633087158203, 26.787822723388672, 547.3587036132812, 436.44134521484375, -524.44482421875, 81.77259063720703, 1408.7623291015625, 257.5299987792969, 535.4579467773438, 552.3282470703125, -145.97308349609375, 429.60205078125, 160.81393432617188, 685.9417114257812, 1256.766357421875, -89.98484802246094, -17.255149841308594, -103.72877502441406, 1178.03759765625, 567.6435546875, 189.16116333007812, 530.1441650390625, 233.66050720214844, 368.75634765625, 334.5027160644531, -235.08404541015625, 510.942626953125, 772.140380859375, 610.7410278320312, -725.61279296875, 99.02074432373047, -980.9002075195312, 46.26788330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000560.npy"}
|
||||
{"epoch": 0.8223201174743024, "step": 561, "batch_size": 64, "mean": 298.46636962890625, "std": 651.0885009765625, "min": -2304.068603515625, "p10": -287.59631347656244, "median": 230.6715850830078, "p90": 1063.9386779785161, "max": 1762.72705078125, "pos_frac": 0.703125, "sample": [557.5147094726562, 481.82000732421875, 100.80794525146484, -7.256082534790039, -1091.104736328125, 1289.56396484375, 804.8489379882812, 81.53660583496094, 665.592041015625, 1108.841796875, 107.32904052734375, 513.6554565429688, 1762.72705078125, -135.6193389892578, 179.81741333007812, 178.44497680664062, 136.62445068359375, -251.02639770507812, 373.87860107421875, 454.6025390625, 1314.50390625, 168.35562133789062, 590.6078491210938, 320.94573974609375, -244.20745849609375, 119.80052185058594, 873.8148193359375, 437.2054138183594, -64.84656524658203, 1718.292236328125, 782.4708251953125, 1264.76220703125, -0.05112457275390625, 248.57818603515625, 923.8243408203125, -575.4869995117188, -303.2691345214844, 556.2127685546875, 20.516756057739258, 675.4916381835938, 131.4361114501953, -393.5229187011719, -87.65039825439453, 822.5487670898438, 520.97216796875, -179.09576416015625, 138.06463623046875, 797.1316528320312, 959.1647338867188, 212.76498413085938, -2304.068603515625, 1752.82958984375, 2.314960479736328, -122.36309051513672, 401.64599609375, -7.27264404296875, 414.59173583984375, -29.218122482299805, 581.5244750976562, 298.98492431640625, -717.3515625, 295.3641662597656, -355.5617980957031, -171.50717163085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000561.npy"}
|
||||
{"epoch": 0.8237885462555066, "step": 562, "batch_size": 64, "mean": 423.41015625, "std": 642.0966186523438, "min": -1266.3299560546875, "p10": -220.91692962646485, "median": 351.2316589355469, "p90": 1013.2212829589844, "max": 2740.4228515625, "pos_frac": 0.765625, "sample": [605.3276977539062, 2740.4228515625, 1392.877685546875, 528.4603271484375, 132.18341064453125, 446.8740539550781, 743.0608520507812, 244.1633758544922, 1003.026123046875, 792.6527709960938, 818.3355102539062, 977.1119384765625, 264.56671142578125, 473.5585021972656, 1064.44287109375, -157.47642517089844, 243.7186279296875, 884.6397705078125, 213.09457397460938, 424.0619201660156, 300.6121826171875, 558.329833984375, 445.45892333984375, -338.4030456542969, 26.441062927246094, 796.5833129882812, 968.6193237304688, -50.45013427734375, 972.316650390625, 270.75244140625, -275.11663818359375, -180.1318359375, -58.47511291503906, 721.87255859375, -60.41187286376953, 2597.265380859375, -223.65667724609375, 467.769287109375, 592.0076904296875, 991.7333374023438, 801.877685546875, 198.0883331298828, 638.818115234375, 240.13958740234375, -1266.3299560546875, 240.0096435546875, -214.52418518066406, 852.0423583984375, 401.85113525390625, 1017.5906372070312, -96.83648681640625, -95.22892761230469, 172.63229370117188, 57.99894332885742, 663.966064453125, 277.55706787109375, -709.2241821289062, 55.385154724121094, 171.58641052246094, -540.072998046875, 113.51570129394531, -460.217529296875, 1098.5577392578125, 1120.84716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000562.npy"}
|
||||
{"epoch": 0.8252569750367107, "step": 563, "batch_size": 64, "mean": 314.54473876953125, "std": 746.2301025390625, "min": -2902.46728515625, "p10": -497.60447387695314, "median": 388.7348327636719, "p90": 1023.6092407226564, "max": 1780.342041015625, "pos_frac": 0.765625, "sample": [205.97024536132812, -2902.46728515625, -912.021484375, 445.792724609375, 774.7800903320312, 148.568359375, 1413.0140380859375, 11.549346923828125, 319.17645263671875, 678.3857421875, 1149.1429443359375, 166.21954345703125, -487.2375183105469, 966.8369140625, 824.2557983398438, 784.987548828125, 109.24064636230469, 1743.292724609375, 704.1101684570312, -158.33714294433594, 570.623046875, 101.82538604736328, -187.74032592773438, 321.461181640625, 907.7929077148438, -5.090282440185547, 719.342041015625, 336.607177734375, 1036.6158447265625, 921.965087890625, 121.02114868164062, 747.215576171875, -1659.9544677734375, 535.1591186523438, -96.72010803222656, 146.03961181640625, -13.38717269897461, 610.7096557617188, 558.064453125, 377.59588623046875, 787.1697387695312, 594.6705932617188, 1291.4853515625, 291.3057861328125, 993.260498046875, -177.9097900390625, -1240.4713134765625, 1780.342041015625, -516.8857421875, 1343.9990234375, 117.02835083007812, 681.0975952148438, 76.15735626220703, -664.30712890625, 694.5965576171875, -502.0474548339844, 452.2529296875, -300.37811279296875, 105.24745178222656, 558.95751953125, 399.873779296875, 468.8301696777344, 323.5713806152344, 538.611572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000563.npy"}
|
||||
{"epoch": 0.8267254038179148, "step": 564, "batch_size": 64, "mean": 402.17816162109375, "std": 532.6932373046875, "min": -541.505859375, "p10": -215.00467834472653, "median": 321.53375244140625, "p90": 1136.3365966796878, "max": 2038.6383056640625, "pos_frac": 0.796875, "sample": [320.646240234375, 475.5238952636719, 760.3238525390625, 142.5481414794922, 672.867431640625, 597.818115234375, 661.8057861328125, -367.1327209472656, 307.6490173339844, -541.505859375, 578.861572265625, 703.9596557617188, -311.26568603515625, 366.3273010253906, 1059.92431640625, 32.03851318359375, -166.93885803222656, 1264.8052978515625, 1169.084716796875, 484.93096923828125, 332.4314270019531, 233.7798614501953, 210.60472106933594, -186.92266845703125, -378.08062744140625, 290.22698974609375, -227.03982543945312, 218.15408325195312, 17.216644287109375, -367.71600341796875, 331.3355407714844, 1944.883544921875, 686.5574340820312, -104.90087127685547, 28.786352157592773, 356.6761474609375, 146.51356506347656, 129.87002563476562, 451.2376708984375, -18.349590301513672, 535.6651000976562, 2038.6383056640625, 63.0213623046875, 570.436279296875, 716.1751708984375, 753.97119140625, 1470.764892578125, 803.9212036132812, 1316.8668212890625, 322.4212646484375, -6.995246887207031, 702.54150390625, 62.275978088378906, 69.48062896728516, 169.35023498535156, 275.19769287109375, 204.43739318847656, 931.109619140625, 326.8551940917969, -88.61293029785156, 1350.695556640625, -312.397216796875, 914.0177612304688, 242.02874755859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000564.npy"}
|
||||
{"epoch": 0.8281938325991189, "step": 565, "batch_size": 64, "mean": 422.9785461425781, "std": 738.9156494140625, "min": -1230.4656982421875, "p10": -436.9650177001952, "median": 282.35890197753906, "p90": 1232.7968139648438, "max": 2629.958740234375, "pos_frac": 0.65625, "sample": [13.850322723388672, 1148.6793212890625, 1342.493408203125, -1230.4656982421875, 215.73760986328125, 953.849609375, 2054.228515625, -127.09126281738281, -136.47291564941406, -302.76092529296875, 557.678466796875, 1168.587646484375, -272.7705383300781, -970.93212890625, 249.17242431640625, -494.1358642578125, -542.5084228515625, -109.64105224609375, -99.1165771484375, 477.9657897949219, 1101.7366943359375, -5.259422302246094, 267.0816650390625, 1566.9678955078125, -510.13128662109375, -29.88732147216797, 216.54180908203125, 285.9297790527344, -583.167724609375, 997.5029907226562, 622.1600341796875, 291.12225341796875, 644.0078125, 864.184814453125, 817.8106079101562, -1.8854522705078125, 921.8063354492188, 620.5136108398438, -303.5663757324219, -72.35662078857422, -73.84312438964844, 927.1455688476562, 762.1226806640625, 1702.9814453125, 854.8056030273438, 2629.958740234375, 521.19970703125, 2242.622802734375, 1226.4261474609375, -501.7767028808594, -118.0704345703125, 15.958938598632812, 993.1136474609375, 480.2134094238281, 798.12548828125, 284.1854553222656, 1026.425537109375, 280.5323486328125, 79.3284912109375, -6.804039001464844, -264.689697265625, 251.2665557861328, 116.41178894042969, 1235.527099609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000565.npy"}
|
||||
{"epoch": 0.8296622613803231, "step": 566, "batch_size": 64, "mean": 470.28839111328125, "std": 739.6570434570312, "min": -1315.94921875, "p10": -363.9959411621094, "median": 541.4591674804688, "p90": 1466.3245605468758, "max": 2128.865966796875, "pos_frac": 0.703125, "sample": [-216.49371337890625, -749.411865234375, 607.168212890625, 2128.865966796875, 518.814453125, -185.71755981445312, 610.759765625, 154.21609497070312, -946.0810546875, 498.9757385253906, 606.9005126953125, -204.51646423339844, -24.71356201171875, 635.6631469726562, 875.4823608398438, 403.15478515625, 699.910400390625, 134.02513122558594, 20.477436065673828, -1315.94921875, 1021.4270629882812, 638.2470703125, 508.4839172363281, 22.05572509765625, 1246.46728515625, -474.3319396972656, 564.1038818359375, 1119.5523681640625, -100.45573425292969, 1118.744873046875, 790.3947143554688, 992.9783935546875, 716.7716064453125, 105.40359497070312, 1062.906494140625, -311.8608093261719, -98.47315979003906, 1944.6888427734375, 978.5700073242188, -203.91700744628906, -329.76904296875, 791.27392578125, 573.8465576171875, 1539.8829345703125, 1294.6883544921875, 1797.9102783203125, 2108.81201171875, 692.8299560546875, -804.9168090820312, 347.6678466796875, -617.272705078125, 1574.94140625, 765.8756103515625, 1592.249755859375, -70.73601531982422, -367.80755615234375, 1055.162353515625, 370.72467041015625, 1072.4791259765625, -355.1021728515625, -0.05834197998046875, 39.1387939453125, 671.3328247070312, 462.0156555175781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000566.npy"}
|
||||
{"epoch": 0.8311306901615272, "step": 567, "batch_size": 64, "mean": 362.74383544921875, "std": 489.8588562011719, "min": -1153.1722412109375, "p10": -68.50524978637694, "median": 287.56842041015625, "p90": 947.5468322753907, "max": 1829.8648681640625, "pos_frac": 0.84375, "sample": [135.987548828125, -12.952432632446289, 1259.6029052734375, 626.323974609375, 386.466552734375, 679.298583984375, 166.72882080078125, -77.51029205322266, 1094.29150390625, 164.75204467773438, 952.3333740234375, 38.37140655517578, 1099.657958984375, 290.09619140625, 201.30862426757812, 276.44842529296875, 1232.413330078125, 608.271728515625, 613.8360595703125, 63.78923416137695, 617.1345825195312, 196.63330078125, -300.0799255371094, 590.8043823242188, 273.404052734375, 736.0543823242188, 670.2024536132812, 1544.48974609375, -47.49348449707031, 645.1934814453125, 65.51177215576172, 293.29669189453125, 331.16387939453125, -1153.1722412109375, 256.3824462890625, -25.121173858642578, 136.95703125, 704.047607421875, 1829.8648681640625, 801.3268432617188, 88.17831420898438, -893.5870971679688, 244.43011474609375, 180.91055297851562, 205.68214416503906, 936.3782348632812, 292.8968505859375, 256.674560546875, 461.33544921875, 379.2487487792969, 182.97691345214844, 790.1992797851562, 17.47472381591797, 501.23199462890625, -132.77540588378906, 316.01434326171875, 142.91622924804688, 285.0406494140625, -157.12197875976562, 198.69140625, 389.3209228515625, 528.4872436523438, -503.99005126953125, 538.8761596679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000567.npy"}
|
||||
{"epoch": 0.8325991189427313, "step": 568, "batch_size": 64, "mean": 480.6161804199219, "std": 529.9495239257812, "min": -777.3663940429688, "p10": -86.28858451843259, "median": 380.53868103027344, "p90": 1147.0157104492187, "max": 1883.54931640625, "pos_frac": 0.875, "sample": [694.718994140625, -642.9443969726562, 1232.220703125, 850.4682006835938, 471.5050048828125, 351.7468566894531, 616.2574462890625, 772.5867919921875, 392.7652282714844, -777.3663940429688, -309.39208984375, -168.6923370361328, 570.5006103515625, 1796.8154296875, 230.65020751953125, 495.8903503417969, 228.62872314453125, 165.6788330078125, 242.9330596923828, -101.21583557128906, 572.7412109375, 338.5025939941406, 924.4664306640625, 990.0422973632812, 580.9188232421875, 349.7367248535156, 83.11263275146484, 1883.54931640625, 294.8907165527344, 359.5187683105469, 787.4771728515625, 708.656982421875, 1147.3599853515625, 522.7483520507812, 1501.5611572265625, 374.4673156738281, 292.646240234375, 607.2400512695312, 311.4490661621094, 931.9818115234375, 980.7953491210938, 25.17380142211914, 771.2890014648438, 891.5504760742188, 73.11491394042969, 311.3834228515625, 160.19271850585938, -51.45833206176758, 371.7655334472656, 1146.21240234375, 101.41783905029297, 1341.205810546875, 386.61004638671875, -607.5982666015625, 232.5076904296875, -380.56787109375, 1326.42724609375, 1086.7767333984375, 242.27230834960938, 576.035400390625, 198.67999267578125, 728.0679931640625, 67.20623016357422, 103.55155944824219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000568.npy"}
|
||||
{"epoch": 0.8340675477239354, "step": 569, "batch_size": 64, "mean": 401.8387451171875, "std": 710.9088134765625, "min": -2199.04296875, "p10": -302.5207916259765, "median": 320.5335998535156, "p90": 1251.5045776367188, "max": 2185.700927734375, "pos_frac": 0.78125, "sample": [-610.7169189453125, 108.94924926757812, 1719.2646484375, 185.39996337890625, 348.82080078125, -232.0247039794922, 704.406982421875, 143.27584838867188, -496.65557861328125, 136.6144561767578, 800.8129272460938, 1240.25732421875, 543.5043334960938, 489.1031494140625, 367.7835693359375, -569.875732421875, 248.52586364746094, 876.3985595703125, -222.39459228515625, -46.51268768310547, -1197.010009765625, 1631.2794189453125, 99.75576782226562, -242.96099853515625, 38.195037841796875, 706.876220703125, 1210.9227294921875, 18.332382202148438, -2.948822021484375, 92.11041259765625, 1359.5042724609375, 885.17431640625, 686.7879638671875, -22.421966552734375, 1386.5443115234375, 394.1775817871094, 121.03955078125, 370.4903259277344, 810.9324951171875, -2199.04296875, 598.9578857421875, 176.96946716308594, 21.187498092651367, 1183.91796875, 993.1278076171875, 136.9035186767578, 910.1552124023438, 229.00582885742188, -328.0464172363281, 804.4627685546875, -330.0479736328125, 1857.1644287109375, 81.45995330810547, 544.8828735351562, 1073.9603271484375, -112.36707305908203, 137.32064819335938, 774.50830078125, 292.24639892578125, 96.63983154296875, 543.6361694335938, 706.9324951171875, 1256.3248291015625, 2185.700927734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000569.npy"}
|
||||
{"epoch": 0.8355359765051396, "step": 570, "batch_size": 64, "mean": 371.16790771484375, "std": 598.2675170898438, "min": -910.0357666015625, "p10": -229.68846740722654, "median": 290.1255645751953, "p90": 1206.9823486328128, "max": 2524.171875, "pos_frac": 0.78125, "sample": [312.50445556640625, 949.9613647460938, 403.81158447265625, 1602.9810791015625, 2524.171875, 109.45790100097656, 768.0586547851562, 1367.74072265625, 1604.6639404296875, 318.38543701171875, 372.0647888183594, 991.988525390625, 269.6423034667969, 461.1092529296875, 12.275777816772461, 397.96112060546875, 923.5791015625, 391.6935119628906, 647.0266723632812, -218.33395385742188, 720.6585083007812, -259.6634521484375, -536.6094360351562, -739.9736328125, 115.72870635986328, 846.00048828125, -585.1284790039062, -194.93392944335938, 319.84368896484375, 379.71002197265625, 492.1638488769531, 253.05503845214844, -234.5546875, -520.330810546875, 228.00222778320312, 211.2630157470703, 242.92645263671875, 448.93487548828125, 120.94322967529297, 1235.855224609375, 529.0845947265625, -141.61419677734375, 239.62338256835938, 145.51239013671875, 1337.862548828125, 286.65814208984375, 293.5929870605469, 707.3372192382812, 1139.6123046875, 733.0126953125, 2.391033172607422, 188.70046997070312, 564.3662109375, 399.23992919921875, -56.81696319580078, -910.0357666015625, 212.212890625, 95.68122863769531, 244.7167510986328, -122.64727783203125, -211.68365478515625, 1360.451171875, 180.26016235351562, -217.40760803222656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000570.npy"}
|
||||
{"epoch": 0.8370044052863436, "step": 571, "batch_size": 64, "mean": 444.7485656738281, "std": 692.2005004882812, "min": -1345.8837890625, "p10": -401.4549560546875, "median": 377.2711486816406, "p90": 1362.0130493164065, "max": 2189.773193359375, "pos_frac": 0.765625, "sample": [587.9260864257812, 829.3096923828125, 84.95271301269531, 897.768798828125, -159.92410278320312, 590.9507446289062, 73.4755859375, 982.3834228515625, 318.1473388671875, 1127.7403564453125, 2000.5791015625, -408.7386169433594, -230.63986206054688, 1781.8118896484375, 166.0399627685547, 616.126953125, 826.2376708984375, 763.37939453125, 372.7094421386719, 809.1605834960938, 2189.773193359375, 1764.01123046875, 751.4920043945312, 1225.05126953125, -384.4597473144531, -2.451051712036133, 698.1714477539062, 312.3800048828125, 1516.560791015625, 1173.701171875, 268.8236083984375, -75.85614013671875, 381.8328552246094, 1678.6435546875, 853.3677978515625, -64.53147888183594, -618.7012329101562, 148.71331787109375, 1285.4619140625, 534.1063232421875, 863.0289916992188, -479.0683288574219, 1394.8206787109375, 567.0480346679688, 408.9468994140625, 127.01233673095703, 593.6058959960938, 163.9807891845703, 623.9616088867188, 18.806930541992188, -567.25390625, -1345.8837890625, 333.677978515625, 53.690216064453125, -664.275390625, 364.0309753417969, 240.44029235839844, 480.3804626464844, -852.92724609375, 45.91251754760742, -123.69857788085938, 585.89892578125, -62.613494873046875, 28.897705078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000571.npy"}
|
||||
{"epoch": 0.8384728340675477, "step": 572, "batch_size": 64, "mean": 451.42071533203125, "std": 780.9384765625, "min": -1384.577880859375, "p10": -347.45179748535156, "median": 343.24412536621094, "p90": 1468.7371093750005, "max": 3164.630126953125, "pos_frac": 0.6875, "sample": [-348.4225769042969, -170.0191192626953, -363.38519287109375, 525.216796875, 1958.0037841796875, -67.59088897705078, 61.029083251953125, 24.57697296142578, -37.58938217163086, 196.76443481445312, -45.438087463378906, 1337.636474609375, 952.586181640625, 361.5933837890625, 696.8151245117188, 1117.673828125, 1050.400390625, 1161.3560791015625, 1532.73193359375, 646.3887329101562, 268.5621337890625, 748.5247802734375, -70.44490051269531, 1064.2635498046875, -1060.8004150390625, 77.1836166381836, 301.07855224609375, -319.347900390625, -258.0608825683594, 841.3223266601562, -538.7421875, 537.1812133789062, 488.6915588378906, -7.711517333984375, 184.85568237304688, 3164.630126953125, 97.5056381225586, 375.5404357910156, -250.0238800048828, -654.8810424804688, 1524.923095703125, 60.20255661010742, -345.1866455078125, 603.282470703125, -289.0336608886719, -380.9895324707031, 1092.502197265625, 866.5481567382812, 665.4802856445312, -13.213645935058594, 855.60693359375, 317.71881103515625, 324.8948669433594, 2064.577392578125, 204.4076385498047, 379.4264221191406, 665.9525756835938, 2130.970703125, -23.390380859375, -1384.577880859375, 609.2246704101562, 1111.763916015625, 1669.2279052734375, 600.9520874023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000572.npy"}
|
||||
{"epoch": 0.8399412628487518, "step": 573, "batch_size": 64, "mean": 407.0157775878906, "std": 575.2774047851562, "min": -563.1766967773438, "p10": -145.5139625549316, "median": 311.3287811279297, "p90": 965.4431518554688, "max": 2611.062255859375, "pos_frac": 0.796875, "sample": [675.3359985351562, -79.09225463867188, 15.259908676147461, -7.45892333984375, -6.4024810791015625, 2611.062255859375, 942.3621215820312, -105.49834442138672, 90.72498321533203, 107.93315124511719, 471.1823425292969, 237.65724182128906, -563.1766967773438, 2156.11572265625, 424.65802001953125, 479.93865966796875, 1793.74609375, -491.13739013671875, 45.10150909423828, -162.66351318359375, -506.0020751953125, 1126.4608154296875, 740.7194213867188, 555.0009155273438, 30.64380645751953, 259.36334228515625, 876.9560546875, 975.3350219726562, 158.83059692382812, 1161.58642578125, -298.1168212890625, 935.763427734375, 327.3995056152344, 543.7359008789062, 666.5552978515625, -349.0950622558594, 325.84161376953125, 107.880859375, 822.6781005859375, 48.74927520751953, 934.5894775390625, 296.8159484863281, 213.4244384765625, -98.55345916748047, 418.71502685546875, 70.90986633300781, 337.331298828125, 78.60673522949219, 867.62841796875, -267.43280029296875, 467.79443359375, 207.30990600585938, 196.70684814453125, 707.49072265625, 392.98150634765625, 88.96483612060547, -97.9205551147461, 860.750732421875, 563.816162109375, 602.5526123046875, 193.7130126953125, 706.998291015625, 1037.9185791015625, 121.96446228027344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000573.npy"}
|
||||
{"epoch": 0.8414096916299559, "step": 574, "batch_size": 64, "mean": 444.5045166015625, "std": 608.9181518554688, "min": -968.09326171875, "p10": -296.0772003173828, "median": 409.3670196533203, "p90": 1324.052221679688, "max": 2041.982666015625, "pos_frac": 0.796875, "sample": [285.5440368652344, 766.2100830078125, -183.29025268554688, -245.7315673828125, 1177.69873046875, 43.57434844970703, 119.140625, 1386.775146484375, 1007.3165893554688, 800.7254028320312, -46.54621124267578, 1580.9737548828125, 968.7435302734375, 796.4494018554688, 137.72512817382812, 737.7481079101562, 80.06133270263672, 428.09552001953125, -689.7743530273438, 2041.982666015625, 631.7413940429688, 652.2659301757812, 529.3005981445312, 630.5262451171875, 205.65118408203125, 711.9807739257812, 390.6385192871094, -428.000244140625, 149.5436248779297, -200.08404541015625, 165.81234741210938, -434.6647033691406, 203.818603515625, 1576.6275634765625, -86.08380889892578, 735.4513549804688, -489.5278015136719, 841.3292236328125, 232.8878631591797, 484.8493347167969, 990.1863403320312, -333.912841796875, 724.0077514648438, 816.079833984375, 683.412109375, 476.6111145019531, 481.8417663574219, 19.045883178710938, -968.09326171875, -317.6539001464844, 389.7917785644531, 101.88170623779297, 7.017107009887695, 1536.263916015625, -46.242706298828125, 922.84521484375, 368.8753967285156, 1514.4058837890625, 231.00428771972656, 669.3533325195312, 114.8642807006836, 1821.5625, 542.1987915039062, 5.455942153930664], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000574.npy"}
|
||||
{"epoch": 0.8428781204111601, "step": 575, "batch_size": 64, "mean": 405.97833251953125, "std": 682.9534912109375, "min": -1260.9619140625, "p10": -308.9338287353515, "median": 370.7556610107422, "p90": 1301.159497070313, "max": 2256.68798828125, "pos_frac": 0.765625, "sample": [1373.368408203125, 249.47499084472656, 891.6384887695312, 359.09771728515625, 592.7094116210938, -834.4427490234375, 540.7056884765625, 547.7719116210938, 248.81161499023438, 142.1016387939453, 76.46302032470703, -14.966041564941406, 397.2253112792969, 1125.427734375, 597.2666625976562, 360.8958435058594, -134.8145751953125, 827.7413940429688, 267.1456298828125, 329.39990234375, 369.9786376953125, 21.0953369140625, 1372.4312744140625, 660.2501831054688, 578.888671875, 314.6962585449219, -1260.9619140625, 254.94613647460938, -700.6337890625, 477.9896240234375, -942.4213256835938, 1216.296630859375, 908.5072021484375, 1337.529296875, 265.45574951171875, -140.6946258544922, -254.7169189453125, -66.27254486083984, 1399.4752197265625, 990.2941284179688, -201.08502197265625, 257.2874755859375, -332.1696472167969, 923.3421630859375, 1204.7730712890625, -900.3179931640625, 529.7940063476562, 542.2698974609375, 813.5052490234375, 2256.68798828125, 371.5326843261719, 871.35595703125, 434.4274597167969, -63.5008544921875, 1504.8892822265625, 168.1148681640625, 655.2762451171875, -1138.7254638671875, 2003.611083984375, 40.51484298706055, -147.03558349609375, 720.35498046875, 172.87025451660156, 549.6846923828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000575.npy"}
|
||||
{"epoch": 0.8443465491923642, "step": 576, "batch_size": 64, "mean": 310.65863037109375, "std": 573.9830322265625, "min": -938.3682250976562, "p10": -360.83446350097654, "median": 245.25313568115234, "p90": 956.8374572753908, "max": 1762.009521484375, "pos_frac": 0.75, "sample": [259.529296875, 930.8665771484375, -139.40809631347656, 63.62924575805664, -371.54376220703125, -189.6886749267578, -207.9751434326172, 1666.7564697265625, 924.5786743164062, 190.44464111328125, 283.4331359863281, 1631.832275390625, 105.93682861328125, 136.6690216064453, 742.0042114257812, 429.675048828125, 366.8497009277344, -819.756591796875, 317.62322998046875, 537.0697631835938, 541.1124267578125, 1145.8909912109375, 648.320068359375, 889.1431884765625, 46.04069137573242, 1389.212158203125, -117.77458190917969, -402.42626953125, -311.7866516113281, 749.7864379882812, -203.0022430419922, 237.76844787597656, 86.16293334960938, 38.478363037109375, 874.1748046875, 598.26318359375, 1762.009521484375, 320.57659912109375, 281.241943359375, 732.3173828125, 63.808738708496094, 786.6158447265625, 252.73782348632812, 410.71142578125, 169.88876342773438, -794.7338256835938, -57.6627197265625, 675.260009765625, -589.7789306640625, 637.767822265625, 369.9713439941406, 1030.4432373046875, 967.9678344726562, 872.7858276367188, -938.3682250976562, -335.8460998535156, 161.072021484375, 226.91018676757812, 235.93203735351562, 174.6981201171875, -430.34820556640625, 86.42135620117188, -313.56292724609375, 55.424781799316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000576.npy"}
|
||||
{"epoch": 0.8458149779735683, "step": 577, "batch_size": 64, "mean": 407.04620361328125, "std": 637.924072265625, "min": -1516.472412109375, "p10": -98.43039703369138, "median": 349.9341125488281, "p90": 1304.7466552734377, "max": 2463.2001953125, "pos_frac": 0.84375, "sample": [-1516.472412109375, 2463.2001953125, 1243.7010498046875, 1592.6502685546875, 167.31198120117188, 35.87834167480469, 344.6399841308594, 151.5131378173828, 159.39501953125, -63.536834716796875, 726.425048828125, 355.2282409667969, 534.9976196289062, 454.06304931640625, 480.6741943359375, 754.7747192382812, 970.772705078125, 1391.7969970703125, -520.7325439453125, 618.2680053710938, 175.3594207763672, 13.499549865722656, -886.2220458984375, 262.65966796875, 259.202392578125, 731.8455810546875, 142.30194091796875, 245.92710876464844, 482.1097717285156, 407.19140625, 154.86434936523438, 456.5251770019531, 253.32052612304688, -54.83623504638672, 1754.8360595703125, 52.498443603515625, 425.2513427734375, -566.439697265625, 229.71693420410156, -406.5238037109375, 381.2401428222656, 220.35183715820312, 333.58795166015625, 404.88800048828125, -26.56781768798828, 618.6726684570312, 75.11894226074219, 636.340087890625, 761.135498046875, 998.161865234375, 106.45127868652344, 369.8142395019531, 1330.9090576171875, -113.38478088378906, 1759.1558837890625, 503.44287109375, 441.2179870605469, -638.887451171875, 74.32106018066406, 1598.7572021484375, 32.71714782714844, 920.4730834960938, 459.4644470214844, 325.941162109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000577.npy"}
|
||||
{"epoch": 0.8472834067547724, "step": 578, "batch_size": 64, "mean": 436.58270263671875, "std": 554.662109375, "min": -836.02392578125, "p10": -162.22628631591795, "median": 395.93190002441406, "p90": 1144.9868469238288, "max": 1996.439208984375, "pos_frac": 0.796875, "sample": [1996.439208984375, 191.99822998046875, 737.508544921875, 99.8277587890625, 585.0419311523438, 250.73980712890625, 1338.4541015625, 421.4786071777344, 207.49295043945312, -177.8302459716797, 94.9090576171875, 849.0436401367188, 496.023681640625, 239.07925415039062, 135.05581665039062, -510.2876892089844, 184.16416931152344, 19.228355407714844, 1847.7459716796875, 765.8177490234375, 726.2606201171875, 400.39947509765625, 997.5986938476562, -113.14408874511719, -55.21668243408203, 883.1415405273438, 834.8723754882812, -97.74051666259766, -336.9381408691406, 126.3759536743164, -24.151779174804688, -425.04443359375, 437.0281982421875, 391.4643249511719, 117.09931945800781, 549.1708984375, -239.8521728515625, 302.43359375, -836.02392578125, -85.49486541748047, 529.532958984375, 700.7157592773438, 537.9530029296875, 470.8050842285156, 1281.9564208984375, 961.48681640625, 340.07415771484375, 97.73885345458984, 784.08544921875, 188.4135284423828, 692.779296875, 797.5482177734375, 74.86131286621094, 1208.1531982421875, 849.310791015625, 1717.0240478515625, -125.81704711914062, 205.82432556152344, 1385.47119140625, 971.4735107421875, 563.8487548828125, -257.93768310546875, 79.19856262207031, 562.625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000578.npy"}
|
||||
{"epoch": 0.8487518355359766, "step": 579, "batch_size": 64, "mean": 266.721923828125, "std": 609.9057006835938, "min": -1755.6685791015625, "p10": -448.79501342773426, "median": 285.3831024169922, "p90": 986.9016235351564, "max": 2150.197265625, "pos_frac": 0.734375, "sample": [8.060508728027344, -179.6111297607422, -1755.6685791015625, 606.5274047851562, -67.46646118164062, 107.10647583007812, 844.0319213867188, 359.2262268066406, 713.5232543945312, 416.2561950683594, 4.142509460449219, -104.92558288574219, -791.47705078125, 418.69781494140625, 186.58010864257812, 638.6441650390625, 242.3319549560547, 561.2669067382812, 1121.9180908203125, 208.58091735839844, 1007.4284057617188, 93.88457489013672, 353.1527404785156, 504.31512451171875, 16.860708236694336, 399.80926513671875, 346.9852294921875, 74.69376373291016, 651.9195556640625, 240.1967010498047, 489.23895263671875, -490.23553466796875, 616.73486328125, 657.60888671875, 2150.197265625, 1492.32568359375, 326.7176513671875, -680.0006103515625, -27.26883316040039, 567.9925537109375, 1259.2515869140625, -251.09152221679688, 756.8429565429688, -352.1004638671875, 1326.8330078125, 13.636579513549805, -22.892047882080078, -221.40675354003906, -614.54541015625, 264.0550537109375, 939.0057983398438, 503.6801452636719, -57.09407043457031, 592.4789428710938, 346.2870178222656, -890.3836669921875, 1044.491455078125, 20.724485397338867, 261.9022521972656, -857.89013671875, 167.2234344482422, -176.1597900390625, 306.7111511230469, 380.33990478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000579.npy"}
|
||||
{"epoch": 0.8502202643171806, "step": 580, "batch_size": 64, "mean": 508.2029724121094, "std": 704.0264282226562, "min": -1174.638916015625, "p10": -72.43252487182616, "median": 345.8044738769531, "p90": 1343.6454589843752, "max": 2943.6806640625, "pos_frac": 0.8125, "sample": [-191.7686004638672, 1016.9036865234375, 276.60986328125, 541.5423583984375, -129.36346435546875, 109.30512237548828, 190.6844940185547, 2099.724365234375, 449.4093933105469, 280.6848449707031, 483.6722412109375, -37.925537109375, 2943.6806640625, 220.54067993164062, 63.62642288208008, 680.410888671875, 377.89886474609375, 140.9762420654297, 844.9114379882812, 493.32415771484375, 236.9276580810547, 1082.5751953125, 1094.1368408203125, 535.9326171875, 511.9071350097656, 2.9080886840820312, 1159.3057861328125, 1277.2327880859375, 577.3081665039062, 145.11941528320312, 45.314720153808594, 160.21876525878906, 111.99007415771484, 101.10417175292969, 792.9674072265625, 1376.8382568359375, 53.115570068359375, -67.76493072509766, 922.83251953125, 774.6222534179688, 1909.7237548828125, 37.82118225097656, -1174.638916015625, 1081.14697265625, -35.13105010986328, 822.1287231445312, -456.9097595214844, -93.26205444335938, 645.6036987304688, 2852.97314453125, -3.7021007537841797, -74.43292236328125, 1411.15087890625, 150.14266967773438, 362.6733703613281, 366.0122985839844, -56.143653869628906, 328.9355773925781, 25.4437198638916, -326.8421936035156, 375.036865234375, 957.9340209960938, 1372.1080322265625, 297.7771911621094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000580.npy"}
|
||||
{"epoch": 0.8516886930983847, "step": 581, "batch_size": 64, "mean": 345.31573486328125, "std": 627.6573486328125, "min": -846.57763671875, "p10": -476.9214172363281, "median": 354.0663604736328, "p90": 997.1770141601564, "max": 2955.347412109375, "pos_frac": 0.75, "sample": [192.82601928710938, 148.32469177246094, 169.37576293945312, -128.64671325683594, 380.57427978515625, 418.01593017578125, -432.9635009765625, 118.28838348388672, 958.0179443359375, -598.397705078125, 742.5160522460938, 716.7891845703125, 517.5882568359375, 343.23199462890625, 618.4339599609375, -482.9017639160156, 1365.674072265625, 887.983154296875, 485.49652099609375, 1276.5758056640625, -213.7474822998047, 636.4780883789062, 62.7529296875, 589.9141235351562, 546.8595581054688, 519.0469970703125, 278.7945251464844, 203.24090576171875, 93.1502685546875, 1013.95947265625, -282.43927001953125, 2955.347412109375, -35.82034683227539, 1569.440185546875, 264.15625, 486.260986328125, 848.599853515625, -151.4739227294922, 69.39258575439453, 479.50128173828125, 330.74859619140625, 96.46493530273438, -83.5111083984375, -305.212890625, 1112.7293701171875, 244.78810119628906, 650.3064575195312, 895.8655395507812, 1333.9276123046875, 625.0048828125, -583.43017578125, -846.57763671875, 28.24422836303711, -561.7113037109375, 626.7789306640625, 400.34783935546875, -477.4913330078125, -798.1514892578125, 65.23687744140625, -475.59161376953125, 364.9007263183594, 792.48388671875, 378.11480712890625, 655.7261352539062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000581.npy"}
|
||||
{"epoch": 0.8531571218795888, "step": 582, "batch_size": 64, "mean": 401.499267578125, "std": 574.64404296875, "min": -1132.9083251953125, "p10": -239.68021392822266, "median": 351.0791778564453, "p90": 1129.4213378906252, "max": 1830.8555908203125, "pos_frac": 0.734375, "sample": [-1132.9083251953125, 238.81988525390625, 1244.8531494140625, 354.2232666015625, 606.7293701171875, -233.408935546875, 292.1761169433594, 471.031005859375, 380.732177734375, -42.649417877197266, 357.96746826171875, 80.95806121826172, -42.91877746582031, 800.0831298828125, 968.9978637695312, 1510.56494140625, -30.54957389831543, -7.5203704833984375, -283.4476318359375, 1151.6220703125, 388.29595947265625, 95.35316467285156, 604.8182983398438, -626.4608154296875, -265.11566162109375, 948.5892333984375, 215.203125, 1072.267822265625, 1519.219970703125, 1830.8555908203125, 894.3765869140625, 398.6197204589844, 1183.873779296875, -45.569862365722656, -381.10540771484375, 239.98770141601562, 55.451393127441406, 185.24635314941406, 651.805908203125, 636.3229370117188, 347.9350891113281, -242.36790466308594, 914.9826049804688, 1056.2567138671875, -15.947372436523438, 1077.61962890625, -31.381568908691406, 769.0408325195312, -190.1840057373047, 243.37586975097656, 1.9030952453613281, 816.3033447265625, 1074.1494140625, 358.24212646484375, -41.63152313232422, -942.43896484375, 285.2035217285156, 251.87088012695312, 707.716796875, 672.6443481445312, 1217.998291015625, 294.3172912597656, 692.7258911132812, 90.22647857666016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000582.npy"}
|
||||
{"epoch": 0.8546255506607929, "step": 583, "batch_size": 64, "mean": 249.65298461914062, "std": 673.0624389648438, "min": -1280.4891357421875, "p10": -611.0190307617187, "median": 234.35025024414062, "p90": 1004.9978637695312, "max": 2030.7437744140625, "pos_frac": 0.71875, "sample": [172.09255981445312, 244.53350830078125, 325.6985168457031, -643.7404174804688, 995.57666015625, -11.829452514648438, 1009.0355224609375, 378.7288513183594, 2011.8900146484375, 161.4249267578125, -198.94842529296875, 67.90113830566406, -952.1722412109375, 320.62750244140625, 162.6041717529297, -291.1265563964844, 897.0436401367188, 905.359130859375, 350.3451232910156, 1086.2977294921875, 517.2088012695312, 65.87320709228516, -534.6691284179688, -397.94171142578125, -107.15021514892578, -908.2457275390625, 314.95489501953125, 204.87548828125, -511.9703369140625, -810.6237182617188, -87.52922058105469, -269.4563903808594, 538.2451782226562, 550.1349487304688, 427.7219543457031, 2030.7437744140625, 412.2984619140625, 12.570381164550781, 981.9725341796875, 224.1669921875, 2.089569091796875, 367.9423522949219, 372.8809814453125, -96.92268371582031, -1132.7890625, -1280.4891357421875, 1663.637939453125, 467.2982177734375, -429.2371520996094, 717.25390625, 957.4393310546875, 1359.5350341796875, 191.0130615234375, 505.1282653808594, 857.99365234375, -751.1650390625, 130.86647033691406, 515.9238891601562, 56.1463623046875, 271.8784484863281, 1260.3074951171875, 36.07147216796875, 12.085639953613281, 278.3789367675781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000583.npy"}
|
||||
{"epoch": 0.856093979441997, "step": 584, "batch_size": 64, "mean": 490.79443359375, "std": 604.7607421875, "min": -640.2380981445312, "p10": -158.251513671875, "median": 350.5055694580078, "p90": 1290.2760375976563, "max": 2394.0078125, "pos_frac": 0.828125, "sample": [516.9074096679688, 239.75889587402344, -318.1685791015625, 291.4659729003906, -19.59100341796875, -447.545654296875, 614.164306640625, 463.10784912109375, 1121.6005859375, 35.7484245300293, 31.4742431640625, -140.40826416015625, 114.4305419921875, 159.66055297851562, 386.41094970703125, 1085.86572265625, -165.89862060546875, 1275.230224609375, 878.2994384765625, -405.5901794433594, 281.6703796386719, 598.5240478515625, -32.36433410644531, 261.6307373046875, 340.1198425292969, 232.39767456054688, 1362.0498046875, 123.40946197509766, 208.90875244140625, 226.02017211914062, 1124.4658203125, 603.3294677734375, 291.0267333984375, -640.2380981445312, 736.23046875, 369.22332763671875, 612.406982421875, 512.7056274414062, 272.7760925292969, 49.233184814453125, 462.01068115234375, -328.336181640625, 360.89129638671875, 1746.6844482421875, 888.1972045898438, 338.26153564453125, 1130.7628173828125, 1203.83154296875, -281.0386962890625, 2394.0078125, 1937.1981201171875, 30.854049682617188, 1645.8853759765625, 1296.7242431640625, 411.01007080078125, 1575.166748046875, 206.57667541503906, 701.3740234375, -76.8565902709961, 280.5920104980469, 555.6659545898438, 400.35504150390625, 1129.801513671875, 150.77586364746094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000584.npy"}
|
||||
{"epoch": 0.8575624082232012, "step": 585, "batch_size": 64, "mean": 442.110595703125, "std": 734.2393798828125, "min": -1668.5323486328125, "p10": -381.0555847167968, "median": 465.81861877441406, "p90": 1377.6603393554692, "max": 2762.061767578125, "pos_frac": 0.75, "sample": [194.168701171875, -221.10997009277344, 658.8006591796875, 330.706787109375, 534.8190307617188, 2762.061767578125, 1022.6080932617188, -649.3662109375, 356.5477600097656, 235.36444091796875, 1046.5377197265625, 23.005386352539062, 577.89306640625, 568.872802734375, -91.6442642211914, 1598.87744140625, 1415.2960205078125, 651.6348266601562, 1513.0628662109375, 551.2330322265625, 464.0590515136719, 1289.84375, 182.633544921875, 495.2181701660156, 1106.636474609375, -56.77986145019531, 780.5677490234375, 1066.3958740234375, 669.1818237304688, 161.78085327148438, 74.72334289550781, 765.402587890625, 1442.51171875, 68.32470703125, 1067.4609375, 224.82601928710938, 1014.7398681640625, -268.0769348144531, -71.38352966308594, 897.2908325195312, 397.4135437011719, -1668.5323486328125, 852.7033081054688, -803.5137939453125, 895.9066772460938, 520.4011840820312, 467.60125732421875, -487.8830871582031, 878.3753051757812, -256.0103454589844, -80.53350830078125, -1151.891845703125, -100.18418884277344, -429.4750061035156, 10.574478149414062, 1575.509765625, 467.57818603515625, -553.7380981445312, 851.3660278320312, -35.05403137207031, 161.35447692871094, 124.30702209472656, 185.55226135253906, 2018.522705078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000585.npy"}
|
||||
{"epoch": 0.8590308370044053, "step": 586, "batch_size": 64, "mean": 289.494140625, "std": 595.770263671875, "min": -1201.45751953125, "p10": -382.7334045410156, "median": 335.82752990722656, "p90": 971.8864868164065, "max": 1676.7799072265625, "pos_frac": 0.671875, "sample": [1202.2061767578125, -211.9365234375, -229.0386962890625, 510.3953857421875, 602.7681884765625, 318.5869445800781, -262.6316223144531, 339.563232421875, 338.5274353027344, -161.33248901367188, -604.88525390625, 366.6762390136719, 761.5673217773438, 1515.36572265625, -73.38338470458984, -144.7608184814453, 559.15869140625, -730.8465576171875, 204.80514526367188, 217.9802703857422, -138.8806915283203, 5.105432510375977, 1097.345703125, 774.1703491210938, 891.8683471679688, -57.76292419433594, 750.226806640625, -297.60565185546875, -100.56061553955078, 388.4817810058594, 723.2748413085938, -151.74737548828125, 377.13580322265625, -1201.45751953125, 602.9392700195312, 694.6619873046875, 1519.20751953125, -382.8138427734375, 509.23468017578125, -136.3304443359375, 906.768798828125, -521.216796875, 456.1365966796875, 435.3522644042969, -1061.7921142578125, -717.2213134765625, 408.7952575683594, 441.16748046875, 167.31809997558594, 543.7615356445312, -382.54571533203125, 333.12762451171875, 999.7940673828125, 1532.574951171875, 877.8237915039062, 232.5140838623047, 315.7464599609375, 1676.7799072265625, 81.65221405029297, -93.39429473876953, 192.2852783203125, 779.4610595703125, 407.74346923828125, 129.71340942382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000586.npy"}
|
||||
{"epoch": 0.8604992657856094, "step": 587, "batch_size": 64, "mean": 415.85150146484375, "std": 572.4729614257812, "min": -1396.8052978515625, "p10": -172.3781326293945, "median": 371.62831115722656, "p90": 1294.3538452148441, "max": 1985.3740234375, "pos_frac": 0.78125, "sample": [535.3759765625, 1067.197265625, 452.85064697265625, 147.2445068359375, 336.1346130371094, 429.5535888671875, 662.5956420898438, -62.03229904174805, -0.8197174072265625, 1473.4835205078125, -421.45062255859375, -95.97970581054688, 1985.3740234375, 350.0694885253906, 248.44061279296875, 518.6170043945312, -21.194488525390625, 484.43084716796875, 404.0581970214844, 387.79156494140625, -234.2057647705078, 682.9285888671875, 1389.41943359375, 18.953275680541992, 408.44482421875, 1182.7374267578125, -239.8907928466797, 955.4302368164062, 762.2923583984375, 25.135055541992188, 95.33483123779297, 22.028892517089844, -445.42803955078125, 76.82241821289062, 685.6473388671875, 123.96446228027344, 199.92669677734375, -1396.8052978515625, 1028.8133544921875, 172.177734375, -372.65777587890625, 816.804443359375, 166.49822998046875, 716.9727783203125, -68.89768981933594, 1342.189453125, 711.384765625, -180.26622009277344, 208.62741088867188, 342.5447082519531, 492.8392333984375, 1604.82275390625, 396.435546875, 700.197509765625, 515.3756713867188, 628.69580078125, 962.6951904296875, -153.97259521484375, 310.0478515625, 19.672924041748047, 355.4650573730469, -67.55615234375, 1415.197021484375, 1355.91259765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000587.npy"}
|
||||
{"epoch": 0.8619676945668135, "step": 588, "batch_size": 64, "mean": 346.6522216796875, "std": 518.298583984375, "min": -912.830322265625, "p10": -283.96333618164056, "median": 313.8847351074219, "p90": 1005.8801330566407, "max": 1428.9361572265625, "pos_frac": 0.75, "sample": [171.4990997314453, 560.9788208007812, -227.16635131835938, 135.95037841796875, 31.560409545898438, 682.071044921875, 708.6807861328125, 55.527984619140625, 866.7720336914062, 687.6226806640625, 118.7501220703125, -384.599365234375, -48.518798828125, 538.5955810546875, -308.3049011230469, -413.0432434082031, 836.0670166015625, 523.157958984375, 187.15155029296875, 259.8909912109375, 645.293701171875, -127.07656860351562, -87.08671569824219, 1137.97265625, 170.88722229003906, 441.434814453125, 1415.7598876953125, -89.5345458984375, -471.91827392578125, 115.92913055419922, 295.6549987792969, -68.91670227050781, 289.32720947265625, 1013.6502685546875, 954.967529296875, 485.4552001953125, 181.18653869628906, -220.14401245117188, 1258.7403564453125, 987.7498168945312, 1240.714111328125, 752.1481323242188, 27.917327880859375, -653.1515502929688, 717.1288452148438, 456.6427307128906, 170.8946075439453, 939.908203125, -912.830322265625, 394.768310546875, -16.82724380493164, 1428.9361572265625, 1210.91015625, 407.92718505859375, 452.0445556640625, 616.3291015625, 597.499755859375, 345.2765808105469, -195.95469665527344, 203.4825439453125, 7.682823181152344, 332.1144714355469, 928.1594848632812, -577.9547729492188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000588.npy"}
|
||||
{"epoch": 0.8634361233480177, "step": 589, "batch_size": 64, "mean": 461.2321472167969, "std": 581.2136840820312, "min": -846.5496826171875, "p10": -219.83479919433591, "median": 397.1772918701172, "p90": 1340.1416137695314, "max": 2217.64208984375, "pos_frac": 0.828125, "sample": [1610.265625, 302.5161437988281, 349.502197265625, 34.18973922729492, 728.27490234375, 104.10728454589844, -158.68714904785156, 616.056396484375, 391.842529296875, -382.83160400390625, -75.55561828613281, 402.5120544433594, 95.99422454833984, 219.5514678955078, 645.6544189453125, 1360.369384765625, 2217.64208984375, 150.1231689453125, 494.09674072265625, 323.5444030761719, 492.26080322265625, 218.8316650390625, 155.20863342285156, 1371.1610107421875, -29.6910400390625, 522.0389404296875, 292.0408630371094, 667.598876953125, 1599.5179443359375, -210.21319580078125, 565.0260009765625, 678.5405883789062, 819.72509765625, 547.529296875, 1157.7381591796875, -223.95834350585938, -279.88885498046875, 160.41387939453125, 790.2800903320312, 108.2552490234375, 446.8105163574219, -587.4305419921875, 233.77590942382812, 839.87255859375, 588.0701904296875, 547.6283569335938, 1092.35107421875, 205.5220184326172, 629.559814453125, 1481.8404541015625, 611.9305419921875, 302.5693054199219, 374.92626953125, -846.5496826171875, 318.772216796875, -633.6989135742188, 689.1007080078125, 1485.4102783203125, 32.39656066894531, 303.0060729980469, 1292.9434814453125, 668.0992431640625, -479.7996520996094, 1090.1671142578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000589.npy"}
|
||||
{"epoch": 0.8649045521292217, "step": 590, "batch_size": 64, "mean": 452.76287841796875, "std": 577.2325439453125, "min": -416.7611083984375, "p10": -239.3719757080078, "median": 348.0946960449219, "p90": 1258.4825683593754, "max": 2069.1435546875, "pos_frac": 0.734375, "sample": [556.17041015625, 1580.1268310546875, 827.1237182617188, 300.015625, 990.9315185546875, -283.456298828125, 140.8243408203125, 2069.1435546875, 849.0896606445312, -216.92709350585938, 452.9882507324219, -261.3125, 368.9122314453125, 1450.0789794921875, -175.49224853515625, -90.26887512207031, -151.82415771484375, 50.538429260253906, 868.1683349609375, 233.06393432617188, 107.17977905273438, -353.3362121582031, -121.82504272460938, 293.619384765625, -137.63702392578125, 167.16375732421875, 775.4727172851562, -280.33172607421875, 1405.466796875, -110.4887924194336, 1062.27490234375, 451.52996826171875, -25.884071350097656, 1406.9718017578125, 225.71060180664062, 429.5011291503906, 375.6519775390625, 584.738037109375, 1188.48974609375, 540.5128784179688, 507.76226806640625, -248.9912109375, -95.60064697265625, -416.7611083984375, 327.27716064453125, 67.39353942871094, 249.44606018066406, 969.2794189453125, 268.9041748046875, -376.316650390625, 446.7027282714844, 1288.4794921875, 295.799560546875, -70.48876190185547, 640.2354736328125, 719.6399536132812, 912.47412109375, 1852.538818359375, 178.80661010742188, 1099.0517578125, 1086.3665771484375, 880.3070068359375, 173.0186309814453, 678.826171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000590.npy"}
|
||||
{"epoch": 0.8663729809104258, "step": 591, "batch_size": 64, "mean": 407.99945068359375, "std": 576.1973876953125, "min": -1005.2146606445312, "p10": -365.83306884765614, "median": 417.767333984375, "p90": 1081.8224731445314, "max": 1788.0003662109375, "pos_frac": 0.84375, "sample": [1.5640945434570312, 1151.937255859375, 1409.6611328125, 677.6277465820312, 988.7962646484375, -140.21434020996094, 26.597822189331055, -1005.2146606445312, 563.9826049804688, 1788.0003662109375, 953.827392578125, 137.78768920898438, 682.8153686523438, 392.9398193359375, 57.352142333984375, 1638.7696533203125, 792.7969970703125, 671.4701538085938, 128.94515991210938, 254.780517578125, -32.15748596191406, 962.8179321289062, 145.38392639160156, 95.67205047607422, 608.3418579101562, 193.94515991210938, -745.19580078125, 687.720947265625, 527.3141479492188, 403.18267822265625, -283.120361328125, 118.55792236328125, 432.35198974609375, -686.08935546875, 1088.37255859375, 167.22813415527344, 179.10452270507812, 66.70112609863281, 661.4025268554688, -401.2813720703125, -441.4404296875, 814.5084838867188, 135.4359893798828, 457.1747131347656, -606.3113403320312, 989.684814453125, 852.1888427734375, 71.83013916015625, 637.7411499023438, 1039.710693359375, 38.647727966308594, 1066.5389404296875, 910.1082763671875, 121.98663330078125, 1312.0350341796875, 493.17840576171875, 472.2745056152344, 1223.10986328125, -732.6856079101562, 697.4066162109375, 128.60398864746094, 505.2265625, 309.48138427734375, 251.05426025390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000591.npy"}
|
||||
{"epoch": 0.8678414096916299, "step": 592, "batch_size": 64, "mean": 372.1299133300781, "std": 731.6919555664062, "min": -1348.8336181640625, "p10": -507.46376037597656, "median": 376.2057342529297, "p90": 1269.70166015625, "max": 2049.67529296875, "pos_frac": 0.734375, "sample": [325.2906494140625, 1155.952880859375, -90.10238647460938, 416.64910888671875, -906.3650512695312, 205.0647735595703, 610.403076171875, 988.213623046875, 1783.186767578125, 1600.6900634765625, -952.4943237304688, 2049.67529296875, 275.2704162597656, 847.38037109375, -1034.8055419921875, -493.8439025878906, -121.60558319091797, 373.43585205078125, 137.7171630859375, 1278.7576904296875, 124.87720489501953, 1133.4462890625, -38.279258728027344, 1248.5709228515625, 437.1529846191406, 86.60186767578125, 174.51742553710938, 961.7015991210938, 1578.523681640625, 574.3982543945312, 59.92916488647461, -315.0589599609375, 112.856201171875, 689.531982421875, 1056.956787109375, 257.587646484375, -513.3008422851562, 358.5240783691406, -439.83062744140625, 606.7583618164062, 504.2415466308594, -173.12973022460938, 747.4376831054688, 1619.544921875, 291.86566162109375, -339.95501708984375, 528.7958984375, -1348.8336181640625, 852.327880859375, 378.9756164550781, 664.37158203125, 92.07466125488281, 802.7699584960938, 550.1109619140625, -919.8585815429688, 880.0099487304688, 674.5894165039062, 135.00433349609375, 1349.190673828125, -260.0947570800781, -1330.951904296875, 868.98779296875, 646.0751342773438, -1.1714420318603516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000592.npy"}
|
||||
{"epoch": 0.869309838472834, "step": 593, "batch_size": 64, "mean": 292.0050048828125, "std": 640.7404174804688, "min": -1071.3353271484375, "p10": -487.2212585449218, "median": 200.50360107421875, "p90": 1223.0030761718751, "max": 1955.8447265625, "pos_frac": 0.65625, "sample": [-21.353900909423828, 727.348876953125, -260.9355773925781, 135.59417724609375, -621.9281616210938, -526.1098022460938, 1493.8685302734375, -656.458251953125, 1165.884033203125, 587.389404296875, -123.1710205078125, 1590.3369140625, 1335.9713134765625, 1091.471435546875, -6.120063781738281, -804.3013305664062, 1247.482666015625, 1572.6646728515625, 707.8572387695312, 157.13206481933594, -396.4813232421875, 190.27520751953125, 389.41021728515625, 76.32279205322266, 100.58232116699219, 1955.8447265625, 462.7491455078125, 119.9511489868164, 375.0020751953125, -835.2883911132812, -2.7525501251220703, 507.5074768066406, -348.4525451660156, 222.32534790039062, 724.8303833007812, 852.5767822265625, -126.47714233398438, 1256.6922607421875, -198.99209594726562, 371.82623291015625, -871.731689453125, 607.2698364257812, -339.4231262207031, -1071.3353271484375, -4.389768600463867, -96.568115234375, 0.10827064514160156, 30.86528778076172, 476.8417053222656, 816.9067993164062, 470.1436767578125, 344.7961730957031, 869.8275146484375, 489.0052490234375, 210.73199462890625, 794.0223999023438, 4.330535888671875, 40.33555603027344, -145.24295043945312, 708.948486328125, -112.84259033203125, 447.1849365234375, -221.1617431640625, 749.6213989257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000593.npy"}
|
||||
{"epoch": 0.8707782672540382, "step": 594, "batch_size": 64, "mean": 371.1469421386719, "std": 615.4995727539062, "min": -917.7213745117188, "p10": -363.20031433105464, "median": 322.06585693359375, "p90": 1174.1113647460938, "max": 1784.0067138671875, "pos_frac": 0.75, "sample": [1005.670166015625, 8.091888427734375, 148.17835998535156, 84.00155639648438, -56.4605712890625, -742.4359741210938, 618.515380859375, 458.34979248046875, 121.85599517822266, 767.1654663085938, 500.64849853515625, 368.0613098144531, 1784.0067138671875, 731.1802978515625, 141.92391967773438, 912.59716796875, 665.8353881835938, 254.24794006347656, 1151.154296875, -427.2435607910156, -579.2528686523438, 1440.1607666015625, -904.72802734375, 1770.4161376953125, -259.54132080078125, 530.9542846679688, -78.95040130615234, 399.1547546386719, 484.2202453613281, -178.63955688476562, 97.56495666503906, 434.8505859375, 1339.8790283203125, 633.7910766601562, 310.65789794921875, 564.9837646484375, 645.8575439453125, 30.577259063720703, 268.6320495605469, -728.4866333007812, -41.41916275024414, 974.6874389648438, 149.66317749023438, -184.56777954101562, 1176.5440673828125, 936.370361328125, 871.4189453125, 606.0053100585938, -377.8862609863281, 464.85821533203125, -917.7213745117188, 39.514190673828125, 259.67950439453125, -203.00933837890625, 333.47381591796875, -41.07834243774414, 1168.43505859375, 915.4976806640625, 1600.674560546875, 244.70240783691406, 1248.5771484375, -328.93310546875, 83.8548583984375, 56.61675262451172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000594.npy"}
|
||||
{"epoch": 0.8722466960352423, "step": 595, "batch_size": 64, "mean": 413.5597229003906, "std": 585.2753295898438, "min": -563.7857666015625, "p10": -201.22907714843745, "median": 303.1824951171875, "p90": 1042.8740600585938, "max": 2684.977294921875, "pos_frac": 0.796875, "sample": [899.6279296875, 307.0675048828125, 582.3409423828125, 38.36664581298828, -13.271644592285156, 1024.3214111328125, 354.48675537109375, 952.5617065429688, -309.61358642578125, 346.35943603515625, 1554.522216796875, 111.07209777832031, 1213.571044921875, 974.6923828125, -214.24830627441406, 375.39410400390625, 316.6230163574219, 1780.279052734375, 210.45411682128906, -121.27249145507812, 68.57989501953125, -229.20736694335938, 645.499755859375, 380.1827392578125, -217.68661499023438, -236.56228637695312, 202.61325073242188, 92.5076675415039, 299.2974853515625, 287.8550109863281, 163.79220581054688, 237.71014404296875, 273.106689453125, 294.51849365234375, 876.803466796875, 24.69249153137207, 333.8897399902344, 611.13134765625, 1600.347412109375, 317.708740234375, -43.78047180175781, -563.7857666015625, 465.9863586425781, -359.3498229980469, 394.4241638183594, 386.0611572265625, 263.3092346191406, -170.8508758544922, 671.4159545898438, 679.8154296875, 12.135185241699219, 789.8303833007812, 95.01197814941406, -103.88832092285156, 421.53350830078125, 109.21517944335938, 1050.8251953125, 98.62179565429688, -130.6451416015625, 3.9603614807128906, 1922.4022216796875, 531.60009765625, 848.8814697265625, 2684.977294921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000595.npy"}
|
||||
{"epoch": 0.8737151248164464, "step": 596, "batch_size": 64, "mean": 291.58941650390625, "std": 624.2456665039062, "min": -1153.9522705078125, "p10": -460.46460266113274, "median": 164.13872528076172, "p90": 1244.934912109376, "max": 2043.0533447265625, "pos_frac": 0.75, "sample": [-312.26611328125, 17.77703857421875, 695.5166625976562, 56.16075134277344, 878.3465576171875, 589.3322143554688, 121.80159759521484, 498.61724853515625, 135.29574584960938, 593.6116333007812, 2043.0533447265625, -119.03237915039062, -585.2066650390625, 439.42041015625, 1026.13671875, 1449.81982421875, 110.04762268066406, 158.0444793701172, 485.39019775390625, 647.281494140625, -124.16493225097656, 555.1549072265625, 55.02067184448242, 165.64051818847656, -494.4996337890625, 262.77392578125, 545.9671020507812, 392.8588562011719, 228.6118621826172, 139.87281799316406, -366.416015625, 124.0675277709961, -591.0460205078125, 1552.3612060546875, 35.61235046386719, 1018.100341796875, 1525.905029296875, 531.0587158203125, 16.813413619995117, -88.06087493896484, 278.1348876953125, 548.7186889648438, 140.4508056640625, 326.7093811035156, -195.7138671875, 327.997802734375, 1338.70556640625, 220.87242126464844, -288.8415222167969, -253.02345275878906, 1826.9403076171875, 49.752845764160156, -1153.9522705078125, 162.63693237304688, 280.3246154785156, -685.1963500976562, 135.60939025878906, -606.3668823242188, 676.5333862304688, 143.44223022460938, 1504.776611328125, -381.0495300292969, 447.0028076171875, -597.523193359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000596.npy"}
|
||||
{"epoch": 0.8751835535976505, "step": 597, "batch_size": 64, "mean": 501.1011047363281, "std": 699.3677978515625, "min": -1190.7113037109375, "p10": -142.0111083984375, "median": 383.52488708496094, "p90": 1472.1178955078126, "max": 2383.426025390625, "pos_frac": 0.796875, "sample": [907.55224609375, 583.5866088867188, 1158.08447265625, 285.55810546875, 434.4228515625, 1542.3145751953125, -143.71536254882812, 1017.6296997070312, 1488.1170654296875, -407.232177734375, -63.525718688964844, -96.75042724609375, 290.8136291503906, 56.067989349365234, 695.0469970703125, 82.9286880493164, 1119.83349609375, 100.17372131347656, 367.3702697753906, 1831.8890380859375, 401.0125427246094, 501.093994140625, 28.386810302734375, 1080.4256591796875, 201.5778045654297, 292.0544128417969, 37.4832878112793, 212.62644958496094, 136.2286834716797, -994.4031982421875, -1190.7113037109375, -45.91510009765625, 1294.72119140625, 399.67950439453125, 605.2633056640625, 2279.083251953125, 1366.471923828125, -53.08350372314453, -417.666015625, 84.04711151123047, 690.3079833984375, 1254.791015625, 26.357032775878906, 272.8927001953125, 407.1424865722656, -138.03451538085938, 594.8178100585938, 900.8588256835938, 52.82914733886719, 1434.7864990234375, 154.8592529296875, -250.71002197265625, -198.87646484375, 2383.426025390625, 1935.1414794921875, 61.90086364746094, 589.8607177734375, 519.3477783203125, 752.8250732421875, 711.9656982421875, 113.88957214355469, 1543.407958984375, -5.034873962402344, 793.1764526367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000597.npy"}
|
||||
{"epoch": 0.8766519823788547, "step": 598, "batch_size": 64, "mean": 394.01727294921875, "std": 591.6588134765625, "min": -1169.5767822265625, "p10": -158.97265472412107, "median": 412.51173400878906, "p90": 1134.2729614257814, "max": 2201.1884765625, "pos_frac": 0.75, "sample": [448.0111083984375, 633.685302734375, 405.9939270019531, 1069.164794921875, -1169.5767822265625, 322.5455627441406, 1106.8460693359375, 677.9136352539062, 199.24542236328125, 1146.02734375, -31.267425537109375, -701.6063232421875, -907.557373046875, 187.5254669189453, 1203.3642578125, -149.9004364013672, 694.5324096679688, 744.5457153320312, 1294.7998046875, 1757.054931640625, 250.36553955078125, 1262.7353515625, 714.333740234375, -275.068359375, 282.30804443359375, 447.2619323730469, 557.907470703125, 356.05718994140625, -1009.9009399414062, 147.69361877441406, 625.2706298828125, 626.9745483398438, 472.80255126953125, -126.17310333251953, -399.43560791015625, 552.4498901367188, 636.4801025390625, 1220.0419921875, 196.06666564941406, 321.71917724609375, 206.65054321289062, 208.50079345703125, -106.13021087646484, 501.7786560058594, -17.58582878112793, 680.2510986328125, 419.029541015625, 423.34368896484375, -3.8012256622314453, 111.41126251220703, 353.0382995605469, 2201.1884765625, 787.357177734375, 780.14794921875, 554.4114379882812, 293.6723327636719, -59.709716796875, 990.9041748046875, 545.7037963867188, -162.86074829101562, 18.463119506835938, -113.36048126220703, 822.074951171875, -8.611370086669922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000598.npy"}
|
||||
{"epoch": 0.8781204111600588, "step": 599, "batch_size": 64, "mean": 334.4490051269531, "std": 512.0916137695312, "min": -2036.3336181640625, "p10": -137.8798736572265, "median": 318.95494079589844, "p90": 931.437420654297, "max": 1550.001220703125, "pos_frac": 0.859375, "sample": [432.4333801269531, 827.3602294921875, 319.1585693359375, 465.8375244140625, 797.9978637695312, 208.54766845703125, 1550.001220703125, 264.63507080078125, 215.05303955078125, 365.8104248046875, 449.9862060546875, -682.2843017578125, 6.40557861328125, 87.89935302734375, 0.46076011657714844, 383.603271484375, 906.6297607421875, 942.0692749023438, 548.2816162109375, 1247.8538818359375, 190.41900634765625, 588.2130126953125, 730.43212890625, 441.4924011230469, 350.2371826171875, 46.76323699951172, 128.91558837890625, 308.186279296875, 261.54443359375, -2036.3336181640625, 698.1785888671875, 264.6331787109375, 510.7486572265625, 267.5810546875, 1095.1356201171875, 717.7213745117188, 564.1463623046875, -96.6290283203125, 1162.9342041015625, 180.91989135742188, 447.7296142578125, 120.08859252929688, 900.751708984375, -251.3662567138672, -50.21827697753906, 327.86322021484375, 522.7062377929688, 158.31436157226562, 324.8686218261719, 135.24032592773438, 1075.9495849609375, 229.56333923339844, 107.65491485595703, 15.024238586425781, -326.3965759277344, 33.929866790771484, 280.5202941894531, 424.71466064453125, -294.10260009765625, 1192.65185546875, -155.55880737304688, -305.76300048828125, 318.7513122558594, 458.8396301269531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000599.npy"}
|
||||
{"epoch": 0.8795888399412628, "step": 600, "batch_size": 64, "mean": 313.2653503417969, "std": 615.0382080078125, "min": -1123.8260498046875, "p10": -537.7569519042968, "median": 209.2383270263672, "p90": 1062.9716796875, "max": 2317.26123046875, "pos_frac": 0.765625, "sample": [165.61888122558594, 647.701171875, 614.3322143554688, -1123.8260498046875, 847.2069702148438, 590.743408203125, -59.433467864990234, 579.655029296875, -618.0671997070312, 505.6534118652344, 81.28487396240234, -60.95444107055664, 1009.1268310546875, -225.84561157226562, 293.7010498046875, 642.860595703125, -713.8639526367188, 1296.1336669921875, 75.9989242553711, 319.0572509765625, 807.5867919921875, 1002.15869140625, -182.1487579345703, 273.2295227050781, 14.128555297851562, -363.77001953125, 703.3697509765625, 1095.8104248046875, 1171.91357421875, 834.5504760742188, 145.78573608398438, 11.661041259765625, 981.5238037109375, 72.55951690673828, 1070.978515625, 625.5198974609375, 29.027450561523438, 250.2084197998047, -455.88519287109375, 1370.8717041015625, 2317.26123046875, 196.61936950683594, -154.77207946777344, 21.62071990966797, 1389.2181396484375, 163.9537353515625, 782.8114624023438, 30.037059783935547, -776.5764770507812, 518.957275390625, 81.69563293457031, 1044.2890625, 330.12420654296875, 9.765556335449219, 221.85728454589844, 786.3657836914062, -591.05859375, 192.77371215820312, 6.357107162475586, -572.8448486328125, -628.0413208007812, 481.094482421875, -210.73550415039062, 82.0467758178711], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000600.npy"}
|
||||
{"epoch": 0.8810572687224669, "step": 601, "batch_size": 64, "mean": 405.19476318359375, "std": 532.619140625, "min": -1110.34814453125, "p10": -234.9526885986328, "median": 366.0316467285156, "p90": 1135.3258544921878, "max": 1709.3031005859375, "pos_frac": 0.78125, "sample": [660.879150390625, 1190.882080078125, 1709.3031005859375, 715.5115966796875, -54.9166145324707, 1019.7229614257812, 370.26861572265625, 156.08642578125, -204.81036376953125, 256.84051513671875, -238.8753662109375, -275.20880126953125, 1271.1201171875, 449.80328369140625, 274.0904235839844, 653.1769409179688, 361.794677734375, 642.775390625, -225.79977416992188, 1364.40283203125, 679.9334716796875, 593.4434814453125, 779.1557006835938, -57.17869186401367, 566.856689453125, 248.58494567871094, -43.97412109375, 565.0698852539062, 816.4517822265625, 230.66531372070312, 421.4316711425781, 317.08441162109375, -889.308837890625, 613.912109375, -380.9629821777344, 591.0935668945312, 146.233642578125, -38.37455749511719, 158.92098999023438, 252.266845703125, 564.7640380859375, 702.4067993164062, 137.37969970703125, -1110.34814453125, 1169.2652587890625, -453.3001708984375, 43.164947509765625, 1501.8248291015625, 614.1910400390625, -251.41650390625, 838.1840209960938, 894.9513549804688, 196.64549255371094, 356.1606750488281, 194.5740509033203, 972.6663208007812, 249.84107971191406, 1239.83056640625, 1056.1339111328125, 397.833984375, -91.63773345947266, 236.60174560546875, 443.6636962890625, 360.73150634765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000601.npy"}
|
||||
{"epoch": 0.882525697503671, "step": 602, "batch_size": 64, "mean": 500.39605712890625, "std": 547.3839111328125, "min": -495.4358215332031, "p10": -86.11441192626948, "median": 383.1275177001953, "p90": 1125.6633056640626, "max": 2324.347412109375, "pos_frac": 0.84375, "sample": [444.8145446777344, 156.12677001953125, -139.09205627441406, 334.3108215332031, 38.349891662597656, 160.59661865234375, 975.5265502929688, 734.2977294921875, 190.2393341064453, 262.47393798828125, 431.9442138671875, 69.37208557128906, 677.3761596679688, 979.1334228515625, 1134.6407470703125, 60.08838653564453, 480.05224609375, 325.40240478515625, 434.07720947265625, 1247.925048828125, -18.351150512695312, 224.010498046875, -35.15594482421875, -495.4358215332031, 1073.443359375, 734.18310546875, 295.38397216796875, 527.7708129882812, 132.06878662109375, -104.42900085449219, 97.2922592163086, 1004.61279296875, 883.3948364257812, 145.52210998535156, 757.9891357421875, 1233.724365234375, 247.93789672851562, 482.78753662109375, -431.4715881347656, -292.2138977050781, 797.7603759765625, 1104.7159423828125, 749.0505981445312, 952.8505859375, 747.14111328125, 836.9019775390625, 2324.347412109375, -113.92634582519531, 1178.3009033203125, -402.29315185546875, 1032.356201171875, 829.7449951171875, 231.13674926757812, 171.34500122070312, 1267.452392578125, -43.38037109375, 279.91070556640625, 839.6138916015625, 154.14566040039062, 117.89230346679688, 802.7836303710938, 2185.2822265625, 310.4796447753906, 211.01597595214844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000602.npy"}
|
||||
{"epoch": 0.8839941262848752, "step": 603, "batch_size": 64, "mean": 453.99981689453125, "std": 660.41259765625, "min": -915.5481567382812, "p10": -313.27838745117185, "median": 361.95921325683594, "p90": 1319.7637207031253, "max": 2498.57177734375, "pos_frac": 0.734375, "sample": [-383.771728515625, -246.10765075683594, 364.97528076171875, 43.67902374267578, 1340.0657958984375, -21.935455322265625, 1144.154052734375, 413.06878662109375, 862.97265625, 1262.939208984375, -74.61663818359375, 29.842063903808594, -641.833251953125, 840.6045532226562, 51.769840240478516, 417.9892272949219, 46.72943115234375, 1420.2906494140625, 1888.782958984375, 488.8296203613281, 980.9922485351562, 904.462890625, 695.973876953125, 54.472599029541016, 266.54949951171875, 301.0816345214844, 435.51495361328125, 135.26242065429688, -15.19277572631836, 253.27801513671875, 1563.01220703125, -915.5481567382812, 1656.816650390625, -195.73544311523438, -5.2594146728515625, 349.974609375, 933.9203491210938, -311.885009765625, 662.0243530273438, 545.3050537109375, 925.99853515625, -313.87554931640625, -88.68681335449219, -38.64542007446289, -125.5925521850586, 550.2673950195312, 776.7457885742188, 138.4629669189453, -342.6831970214844, 437.6064453125, 486.7962646484375, 1272.3922119140625, 923.6002197265625, -366.2525634765625, 243.60711669921875, -459.96453857421875, 2498.57177734375, 358.9431457519531, 1718.966064453125, 43.3736572265625, 852.63916015625, 1104.8616943359375, 904.4666137695312, 10.941043853759766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000603.npy"}
|
||||
{"epoch": 0.8854625550660793, "step": 604, "batch_size": 64, "mean": 493.2238464355469, "std": 678.4652709960938, "min": -895.0358276367188, "p10": -313.49714050292965, "median": 439.3319396972656, "p90": 1535.649353027344, "max": 2307.789794921875, "pos_frac": 0.71875, "sample": [-365.61749267578125, 172.90127563476562, -262.3268127441406, -117.42552185058594, 642.7030639648438, 989.7188720703125, 948.2613525390625, 1089.564453125, 932.5780639648438, 2307.789794921875, -895.0358276367188, 922.404541015625, -4.2804412841796875, -293.0589904785156, 855.203125, 1113.8271484375, 887.1531372070312, 87.37493896484375, 383.1335754394531, 990.5184326171875, 152.35882568359375, -589.8610229492188, 76.495361328125, 1497.471923828125, 605.858642578125, -577.8001098632812, 1665.19140625, -230.7788543701172, 140.74960327148438, 171.11962890625, -4.734153747558594, 440.644775390625, 682.9924926757812, 434.5022277832031, -67.78646850585938, 1636.0733642578125, 642.4938354492188, 28.865516662597656, -322.25634765625, 854.4354858398438, 586.8980102539062, 438.01910400390625, 499.4633483886719, 289.95843505859375, 427.1507873535156, 2206.14404296875, -527.6898193359375, 1593.71142578125, 453.6470642089844, -50.21153259277344, 604.885498046875, 1054.3968505859375, 789.7860107421875, -49.29774856567383, 1815.0687255859375, 561.0997924804688, -380.19915771484375, -17.849212646484375, 394.7258605957031, 865.9326782226562, -27.5833740234375, 500.33404541015625, 1552.0111083984375, 364.5019836425781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000604.npy"}
|
||||
{"epoch": 0.8869309838472834, "step": 605, "batch_size": 64, "mean": 364.563232421875, "std": 619.1599731445312, "min": -1080.8778076171875, "p10": -280.0462249755859, "median": 268.94615173339844, "p90": 1077.9835083007818, "max": 2804.8779296875, "pos_frac": 0.734375, "sample": [-55.34141540527344, -289.3349609375, -21.6883544921875, 195.22933959960938, -3.7275161743164062, 84.87379455566406, 806.4883422851562, 666.2239379882812, 889.0159912109375, 235.28546142578125, 616.6793212890625, -25.077728271484375, 142.04217529296875, -285.9608154296875, 449.58074951171875, 458.5934753417969, 167.33750915527344, 833.3178100585938, 1608.2188720703125, -263.12890625, 2804.8779296875, 114.95669555664062, 572.73486328125, 20.994234085083008, 184.9086151123047, 898.8948974609375, -159.04071044921875, -266.2455139160156, 306.29571533203125, 364.7637023925781, 47.723411560058594, -296.93896484375, 819.2278442382812, -510.6501159667969, -134.22027587890625, 437.4383239746094, 1147.77685546875, 88.26701354980469, 262.80120849609375, 741.6778564453125, 64.89759826660156, 2262.09912109375, -653.8203125, -1080.8778076171875, 491.2506103515625, 427.17449951171875, -299.1505432128906, 1166.8359375, 104.74964141845703, -4.756513595581055, 549.5943603515625, 433.5511474609375, 1195.9844970703125, 275.0910949707031, 241.1620635986328, 405.02752685546875, 421.5736083984375, 1140.0440673828125, 418.6727294921875, 188.2532501220703, 933.175537109375, 394.1478576660156, -161.23883056640625, 763.734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000605.npy"}
|
||||
{"epoch": 0.8883994126284875, "step": 606, "batch_size": 64, "mean": 406.254150390625, "std": 578.8989868164062, "min": -604.4510498046875, "p10": -307.6711807250976, "median": 307.5107116699219, "p90": 1068.1832275390625, "max": 2125.666259765625, "pos_frac": 0.75, "sample": [-492.88165283203125, 37.690032958984375, 138.55960083007812, -334.6037292480469, 334.2984313964844, 927.8582763671875, 287.6645812988281, 187.1148681640625, 1117.270751953125, 786.7747192382812, 153.0751495361328, 2125.666259765625, 901.281005859375, -34.853271484375, -515.354736328125, 233.05242919921875, 504.228271484375, 232.659423828125, 245.68008422851562, 1323.093994140625, 841.4561767578125, -415.386962890625, 1570.7025146484375, 22.112632751464844, 938.246337890625, 700.8404541015625, 561.2489013671875, 187.47901916503906, -91.4248275756836, -244.8285675048828, 507.64031982421875, 657.0533447265625, -441.51507568359375, 288.4375305175781, -561.3220825195312, 375.30322265625, 210.76992797851562, 692.5697631835938, -105.01212310791016, -105.3511962890625, -604.4510498046875, -126.30896759033203, 2109.659423828125, 1073.4830322265625, 251.85549926757812, 690.0985107421875, 79.93903350830078, 505.177001953125, 941.6738891601562, 322.89544677734375, -22.877365112304688, 863.2105712890625, -38.91133117675781, 292.1259765625, 1055.8170166015625, 1524.1204833984375, 354.000244140625, 670.8212280273438, -3.2524490356445312, 276.31158447265625, 528.5940551757812, 601.5214233398438, 562.0758666992188, 345.3937072753906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000606.npy"}
|
||||
{"epoch": 0.8898678414096917, "step": 607, "batch_size": 64, "mean": 347.606689453125, "std": 581.8700561523438, "min": -1150.8497314453125, "p10": -226.73369750976556, "median": 307.9254608154297, "p90": 1045.9131164550784, "max": 2569.103515625, "pos_frac": 0.8125, "sample": [468.2796630859375, -123.78273010253906, 529.8599243164062, 227.17625427246094, 149.92718505859375, 173.76002502441406, 1065.0953369140625, 430.13909912109375, 4.314788818359375, 1282.115478515625, -305.0166015625, 724.49853515625, 452.3182067871094, -251.39117431640625, 122.2978515625, 1065.883544921875, -663.5620727539062, 194.8248291015625, 377.02850341796875, 80.19928741455078, -169.1995849609375, 145.0402374267578, 376.7221984863281, 475.60595703125, -468.4803466796875, 616.4624633789062, -130.37261962890625, 1512.0107421875, 716.3009033203125, 126.26454162597656, 587.876708984375, 106.89154815673828, 763.1898803710938, 254.52944946289062, 542.3917846679688, 384.15869140625, 1001.1546020507812, 27.287353515625, 1522.5106201171875, 380.5259704589844, -1072.88720703125, 575.39501953125, -167.98724365234375, 533.41064453125, 681.08203125, 91.99237060546875, 3.8278579711914062, 125.93706512451172, -1150.8497314453125, 268.53179931640625, 531.58935546875, 326.1231384277344, -149.95474243164062, 628.614013671875, 249.91604614257812, 49.42222595214844, 289.727783203125, 2569.103515625, 1369.0555419921875, 707.2258911132812, 289.4372253417969, -347.7000732421875, 589.1575927734375, 481.8223876953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000607.npy"}
|
||||
{"epoch": 0.8913362701908958, "step": 608, "batch_size": 64, "mean": 394.43609619140625, "std": 487.6206359863281, "min": -508.9827575683594, "p10": -264.8615325927734, "median": 387.0048522949219, "p90": 936.8466552734377, "max": 1691.9454345703125, "pos_frac": 0.8125, "sample": [679.5803833007812, 842.6430053710938, 368.36395263671875, 508.54779052734375, -422.0726623535156, 1528.12158203125, -357.0835876464844, 132.4229278564453, -194.17909240722656, 572.2408447265625, 733.6663208007812, 48.07969665527344, 49.997718811035156, 614.7772827148438, -118.80723571777344, 146.0354766845703, -508.9827575683594, 439.86968994140625, -95.03868865966797, 336.0982360839844, 502.6417236328125, 641.241943359375, 88.46562194824219, 754.416259765625, 47.954505920410156, 408.07696533203125, 653.1060791015625, 487.4891662597656, 956.2774047851562, -265.09857177734375, 77.8162841796875, -45.45817565917969, 253.20545959472656, 323.5086975097656, 714.8521728515625, 299.56646728515625, 481.08355712890625, 405.645751953125, 1197.0113525390625, -297.56890869140625, 1690.763916015625, 219.02569580078125, 649.0408935546875, 1022.2266845703125, 1195.794189453125, 313.29534912109375, 63.476531982421875, 815.373046875, 44.253501892089844, -282.3627014160156, 613.103759765625, 3.4816360473632812, 546.12890625, 792.7034301757812, -264.3084411621094, 891.5082397460938, -344.7373352050781, 99.86697387695312, 1691.9454345703125, 730.3302001953125, 749.0147094726562, 515.2033081054688, 199.20346069335938, 301.0662841796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000608.npy"}
|
||||
{"epoch": 0.8928046989720999, "step": 609, "batch_size": 64, "mean": 509.67462158203125, "std": 781.104736328125, "min": -1132.5072021484375, "p10": -249.16799468994128, "median": 410.7147216796875, "p90": 1216.559851074219, "max": 3841.43115234375, "pos_frac": 0.78125, "sample": [75.50216674804688, 106.74121856689453, 104.2748794555664, -620.065673828125, 254.5305938720703, 408.35894775390625, 1256.4852294921875, 413.07049560546875, 592.2719116210938, -347.44512939453125, 769.5745239257812, 1785.91162109375, 99.28086853027344, 443.3001708984375, 765.9688110351562, -1132.5072021484375, -93.35134887695312, 513.1996459960938, 1036.768798828125, 397.5000915527344, 695.7838134765625, -100.40899658203125, 700.1193237304688, 634.4799194335938, -324.55841064453125, 588.9271240234375, 1317.99755859375, 219.76358032226562, -493.6918029785156, 675.6321411132812, 1643.2335205078125, 635.6482543945312, 2437.844482421875, 548.1668090820312, 2892.26416015625, 940.68701171875, -9.996284484863281, 714.460693359375, -78.18746185302734, 293.6546325683594, 390.2080383300781, 203.03614807128906, 828.570556640625, 3841.43115234375, 778.998291015625, 92.43243408203125, 437.1611633300781, -113.81535339355469, -658.6927490234375, 177.9501190185547, 336.1983642578125, 279.6111145019531, 1102.5706787109375, -39.58665466308594, -4.691215515136719, 1021.1597900390625, 304.7545166015625, 626.838134765625, 656.4592895507812, 1123.400634765625, 269.91357421875, 443.84783935546875, -307.17626953125, 67.40580749511719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000609.npy"}
|
||||
{"epoch": 0.8942731277533039, "step": 610, "batch_size": 64, "mean": 313.662353515625, "std": 590.554443359375, "min": -1913.3743896484375, "p10": -402.4262023925781, "median": 300.8416290283203, "p90": 922.6317382812505, "max": 1717.2615966796875, "pos_frac": 0.78125, "sample": [592.727294921875, 713.6580810546875, 104.52973937988281, 209.61228942871094, -388.8856201171875, 691.2636108398438, 80.58436584472656, 565.9742431640625, 574.7135620117188, -577.2609252929688, 17.16663360595703, 568.6644287109375, 20.17536163330078, 1283.623779296875, 622.1207275390625, 349.76055908203125, 166.46823120117188, 188.54714965820312, -545.0983276367188, -1913.3743896484375, 469.96917724609375, 1717.2615966796875, 164.77517700195312, 1014.2200927734375, -257.9698486328125, 370.86883544921875, 138.8544921875, -408.22930908203125, 771.8870239257812, 739.3829345703125, 1605.835205078125, -59.35697937011719, 0.6893157958984375, 713.5413818359375, 294.18426513671875, 1571.24853515625, -482.2277526855469, 204.87374877929688, 322.73126220703125, 642.1701049804688, 699.065185546875, 447.83251953125, -41.7586784362793, 735.8605346679688, 138.54672241210938, 248.61370849609375, 972.4064331054688, 142.57513427734375, -115.64279174804688, 337.2257080078125, 34.62281799316406, 609.441650390625, 265.7818298339844, -95.94841003417969, 654.51220703125, 167.88241577148438, 334.23101806640625, 308.12933349609375, -174.32362365722656, 307.4989929199219, -473.7134094238281, -654.47998046875, 806.4907836914062, 1559.859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000610.npy"}
|
||||
{"epoch": 0.895741556534508, "step": 611, "batch_size": 64, "mean": 539.230224609375, "std": 747.3662719726562, "min": -783.7963256835938, "p10": -258.3432540893554, "median": 423.71192932128906, "p90": 1484.3809936523437, "max": 2606.8134765625, "pos_frac": 0.765625, "sample": [1421.3038330078125, 431.7895812988281, 2606.8134765625, 1660.573974609375, 1155.3416748046875, 605.2615966796875, -89.59998321533203, 1340.6885986328125, 115.6621322631836, 227.79006958007812, 326.1958312988281, 720.27587890625, 14.909374237060547, -147.3780975341797, -712.5304565429688, -228.88319396972656, -360.39324951171875, -783.7963256835938, 278.6047668457031, 1452.8489990234375, -705.1573486328125, 1234.375732421875, 2120.376708984375, -397.92999267578125, 1474.2908935546875, 567.7706298828125, 1.3872451782226562, 998.8410034179688, 574.1875610351562, 49.69572448730469, -93.58013916015625, 1011.6817626953125, 367.8602294921875, -72.81226348876953, 435.95574951171875, 1116.923095703125, 43.51081085205078, 235.4373779296875, -201.54690551757812, 292.0350341796875, 998.3040771484375, 357.14593505859375, 12.928115844726562, 1842.99560546875, 437.0740966796875, 473.9591064453125, -291.8968200683594, 491.2727966308594, 1003.588134765625, -111.83536529541016, 652.3720092773438, -270.968994140625, 262.8825378417969, 415.63427734375, 794.6087646484375, 496.724609375, 349.7413635253906, 1831.208984375, 465.03704833984375, 1488.705322265625, 1182.32568359375, -7.869300842285156, 2526.341796875, 21.67403793334961], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000611.npy"}
|
||||
{"epoch": 0.8972099853157122, "step": 612, "batch_size": 64, "mean": 412.41900634765625, "std": 568.640625, "min": -819.7246704101562, "p10": -273.43865966796875, "median": 349.06353759765625, "p90": 1082.3628173828126, "max": 2391.05224609375, "pos_frac": 0.8125, "sample": [975.4953002929688, 306.3393249511719, 305.2430725097656, -359.4724426269531, -60.901004791259766, 26.880884170532227, 1017.0647583007812, 356.5807800292969, 429.62811279296875, 459.0221862792969, -624.33447265625, 870.9335327148438, 193.88308715820312, 1038.359375, 834.0338134765625, 943.5825805664062, 553.0277099609375, 230.24267578125, 509.227294921875, 1207.19580078125, 806.9053955078125, 95.96245574951172, -819.7246704101562, 1302.234619140625, 714.7825317382812, -272.1419677734375, 822.4575805664062, -436.515380859375, 609.599853515625, 638.0133056640625, 504.7740478515625, -789.93701171875, 77.11708068847656, 341.5462951660156, 69.30902099609375, -335.85638427734375, 595.5265502929688, 397.0440979003906, 122.244384765625, -158.8853759765625, 135.82237243652344, 153.5783233642578, 153.49261474609375, 209.9410400390625, 2391.05224609375, 62.002262115478516, 807.3770751953125, 1101.221435546875, 1228.467041015625, 260.6278076171875, 163.64271545410156, 785.8681030273438, 1412.4398193359375, 147.82254028320312, -16.919723510742188, -165.54835510253906, 484.75994873046875, 1.0710601806640625, 971.2689819335938, -273.994384765625, 1135.5587158203125, 683.1022338867188, 955.2660522460938, 110.406494140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000612.npy"}
|
||||
{"epoch": 0.8986784140969163, "step": 613, "batch_size": 64, "mean": 496.83154296875, "std": 602.9065551757812, "min": -862.5313720703125, "p10": -30.466036224365226, "median": 408.52552795410156, "p90": 1198.5684082031253, "max": 2370.504638671875, "pos_frac": 0.84375, "sample": [575.285400390625, 1224.2889404296875, 1342.61181640625, -181.29100036621094, 1064.728515625, 1138.5538330078125, 43.91865921020508, 1134.1727294921875, 932.355224609375, 30.03397560119629, 1443.8812255859375, 206.87265014648438, 689.2914428710938, 236.416748046875, 1555.7479248046875, -320.852783203125, -5.6096343994140625, 423.914306640625, 377.287353515625, -22.865707397460938, 368.9669189453125, 77.55101776123047, 721.6334838867188, 605.06494140625, 1124.0245361328125, 300.6431884765625, 1114.74755859375, 1015.8660278320312, 7.39068603515625, 503.925048828125, 731.5330200195312, 44.131744384765625, -33.72332000732422, 583.618896484375, 364.1392822265625, 1835.427734375, 666.994873046875, 698.2974853515625, 2370.504638671875, 3.8203353881835938, -856.7022094726562, 996.71337890625, 648.3374633789062, 824.103759765625, 620.1129150390625, -862.5313720703125, 179.6953887939453, 1521.9129638671875, -4.473545074462891, 238.28216552734375, 854.08203125, -219.80535888671875, -486.0658874511719, 160.18927001953125, 1107.36669921875, 523.8475341796875, 161.18576049804688, 393.1367492675781, 51.304527282714844, 567.7225952148438, 7.5076141357421875, 96.8602066040039, 187.53683471679688, 93.5982666015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000613.npy"}
|
||||
{"epoch": 0.9001468428781204, "step": 614, "batch_size": 64, "mean": 485.2371826171875, "std": 682.4478759765625, "min": -758.361083984375, "p10": -258.980694580078, "median": 459.6297912597656, "p90": 1356.8653808593754, "max": 2577.29345703125, "pos_frac": 0.734375, "sample": [209.75865173339844, 436.328857421875, -593.1375732421875, 953.8035888671875, -87.91716003417969, 1282.5189208984375, 14.504886627197266, 387.8717346191406, 504.57025146484375, -11.36208724975586, 1266.9569091796875, 318.17816162109375, 518.0064697265625, 762.8468627929688, 205.36505126953125, -166.0338134765625, 1429.82861328125, 756.4702758789062, 195.97828674316406, -153.9140167236328, 1115.8837890625, -19.107330322265625, 653.4371337890625, 957.71240234375, -645.6848754882812, -118.89542388916016, 482.93072509765625, -108.93716430664062, 614.5933227539062, -296.7484130859375, 582.1989135742188, 1746.148681640625, -170.85601806640625, 781.2136840820312, -542.87548828125, -146.806396484375, 686.0031127929688, -758.361083984375, -42.44862365722656, 724.20703125, 1551.2503662109375, 379.7981262207031, 1568.814208984375, 263.2566223144531, 1118.3031005859375, 1140.1041259765625, 1270.9930419921875, 263.77880859375, 654.5487060546875, 698.1323852539062, 27.539146423339844, 333.8923034667969, 1388.7281494140625, 105.14968872070312, 687.8037109375, 2577.29345703125, 1899.572998046875, 143.2049560546875, -654.8387451171875, -736.7711791992188, 1219.3919677734375, 147.72027587890625, 505.1399841308594, 778.14404296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000614.npy"}
|
||||
{"epoch": 0.9016152716593245, "step": 615, "batch_size": 64, "mean": 406.22174072265625, "std": 654.9724731445312, "min": -1847.3687744140625, "p10": -214.1412460327148, "median": 258.9588394165039, "p90": 1132.911572265625, "max": 2489.979736328125, "pos_frac": 0.8125, "sample": [147.51516723632812, 718.9938354492188, 177.83123779296875, 1455.82666015625, 228.41734313964844, 1239.3704833984375, -25.472152709960938, 63.99677276611328, 41.19873046875, 99.28195190429688, 937.6357421875, 273.4111328125, 242.6700897216797, -334.49951171875, 803.4951171875, -446.5713195800781, 965.2835083007812, -1847.3687744140625, 245.18934631347656, 378.6955261230469, 272.72833251953125, 159.73095703125, 2023.7950439453125, 163.40301513671875, 72.68733978271484, 1019.9949951171875, -165.84683227539062, 324.89373779296875, 508.8517761230469, 99.55209350585938, 393.31158447265625, 910.7526245117188, 649.533447265625, 979.2493896484375, 1332.089599609375, 3.7653579711914062, 180.7213592529297, 113.29153442382812, -412.349609375, -115.71601104736328, -831.2781372070312, 1050.5025634765625, 1139.524169921875, 52.07965087890625, 1592.116943359375, 533.5494995117188, 555.4909057617188, 1117.482177734375, 155.279296875, 481.65325927734375, 947.5741577148438, -234.83885192871094, 194.80592346191406, -23.323333740234375, 725.5309448242188, -395.5334167480469, 222.37359619140625, 380.35418701171875, 465.3711853027344, 752.6765747070312, -130.9123077392578, 2489.979736328125, 741.4156494140625, 136.97776794433594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000615.npy"}
|
||||
{"epoch": 0.9030837004405287, "step": 616, "batch_size": 64, "mean": 337.239990234375, "std": 563.5425415039062, "min": -717.1775512695312, "p10": -348.5673065185547, "median": 300.2670135498047, "p90": 1160.3758422851565, "max": 1729.2728271484375, "pos_frac": 0.765625, "sample": [902.2392578125, 13.269195556640625, 108.00666046142578, 740.7044067382812, 489.4858093261719, 60.813453674316406, 312.44921875, -233.23129272460938, 329.7800598144531, 1188.768798828125, 1333.463134765625, 603.9046020507812, 186.86624145507812, 92.10966491699219, -666.3921508789062, 1191.8516845703125, 421.3175964355469, 288.0848083496094, 122.58983612060547, -99.8091049194336, 1424.3074951171875, 540.03564453125, 483.8152160644531, 458.3050842285156, -323.7012634277344, -613.9149169921875, 448.92022705078125, 279.16009521484375, -61.683311462402344, 1271.2506103515625, 1641.8692626953125, 167.45335388183594, -490.96575927734375, 16.564476013183594, 34.08823776245117, -681.6057739257812, -161.05523681640625, 195.39382934570312, 933.5630493164062, 664.8076782226562, 423.491943359375, 395.37518310546875, 401.86151123046875, 593.0762329101562, -717.1775512695312, 998.6395263671875, 177.0910186767578, -319.83160400390625, 167.28863525390625, 709.455322265625, 384.844970703125, -306.4184875488281, 1729.2728271484375, 380.6539306640625, 757.5927734375, 145.12655639648438, 281.096923828125, 144.1573028564453, -296.6211242675781, 626.5880737304688, 973.114990234375, -359.22418212890625, 1094.1256103515625, -413.1029968261719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000616.npy"}
|
||||
{"epoch": 0.9045521292217328, "step": 617, "batch_size": 64, "mean": 239.33428955078125, "std": 549.9153442382812, "min": -1120.830810546875, "p10": -401.5051757812499, "median": 264.7053527832031, "p90": 685.8989868164062, "max": 2128.8388671875, "pos_frac": 0.765625, "sample": [334.2320556640625, 141.29241943359375, -216.438720703125, 325.4556579589844, 400.0152282714844, -227.56854248046875, 29.7029972076416, -597.765625, 658.9923095703125, 165.50831604003906, -231.17514038085938, 384.59674072265625, 400.2426452636719, 435.49432373046875, 210.4630126953125, 228.491943359375, 368.8731689453125, 120.67527770996094, 46.37770080566406, 1274.209716796875, 678.758544921875, 572.2413330078125, 173.5426025390625, 34.35314178466797, 399.0122985839844, 444.7196960449219, 103.2883529663086, -228.5889434814453, 333.1059265136719, -1120.830810546875, 55.434173583984375, 396.5604553222656, -301.92718505859375, -297.6949157714844, 687.2484741210938, 328.0941162109375, 1077.47216796875, 682.7501831054688, 556.2569580078125, 449.8536071777344, 547.5557250976562, -444.18145751953125, 847.178466796875, 631.9865112304688, 1828.3756103515625, 465.261962890625, 587.1951904296875, -628.6304931640625, 246.89468383789062, 505.32989501953125, -1060.5538330078125, 125.84526062011719, 37.89861297607422, 228.58343505859375, 282.5160217285156, -123.97467041015625, 833.4881591796875, -169.15155029296875, 16.788827896118164, -447.7467041015625, 2128.8388671875, 303.4302978515625, -738.4503173828125, 37.591339111328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000617.npy"}
|
||||
{"epoch": 0.9060205580029369, "step": 618, "batch_size": 64, "mean": 517.63623046875, "std": 842.433837890625, "min": -1799.8807373046875, "p10": -461.64458923339845, "median": 434.40679931640625, "p90": 1292.820642089844, "max": 3091.216796875, "pos_frac": 0.78125, "sample": [-1799.8807373046875, 850.16845703125, 248.19720458984375, 321.9898986816406, 94.5259780883789, 261.38519287109375, 138.4156494140625, 66.52226257324219, 2372.493408203125, 929.4354858398438, 246.6401824951172, -956.0972290039062, 838.9257202148438, 205.06068420410156, 1278.69921875, 330.3620910644531, -387.263916015625, 1179.3782958984375, 779.2931518554688, -317.2714538574219, -194.8089599609375, -465.41607666015625, -487.284912109375, 24.700851440429688, -778.8060302734375, -672.1676025390625, 187.99227905273438, 1298.8726806640625, 1047.052978515625, 115.31561279296875, 601.27001953125, 444.89666748046875, 671.1250610351562, 1045.07470703125, 970.9135131835938, -868.1447143554688, -452.8444519042969, 1356.443115234375, 335.5123596191406, 467.6662902832031, 827.3367919921875, 1274.3759765625, 990.1488037109375, 2523.355712890625, 955.9830932617188, -116.71869659423828, 423.91693115234375, 652.585693359375, 330.2161865234375, 3091.216796875, 810.994140625, 41.95256805419922, 1473.2884521484375, 1250.0067138671875, 872.0554809570312, 511.52593994140625, -163.6730194091797, 1118.345947265625, 2489.130615234375, -38.637237548828125, 423.30255126953125, 1047.7628173828125, 654.363525390625, 357.53656005859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000618.npy"}
|
||||
{"epoch": 0.9074889867841409, "step": 619, "batch_size": 64, "mean": 395.64404296875, "std": 803.299560546875, "min": -1036.502197265625, "p10": -517.3037841796875, "median": 278.3852081298828, "p90": 1407.0930297851564, "max": 3312.255615234375, "pos_frac": 0.671875, "sample": [-1001.739013671875, -615.66162109375, 4.6888427734375, 1165.9327392578125, 1491.5262451171875, -104.2719497680664, 284.3180847167969, 188.25289916992188, 342.08984375, -460.7377014160156, 635.8877563476562, 725.87890625, 406.54150390625, 1813.169189453125, -670.5184936523438, 209.11102294921875, 1204.4560546875, 397.43511962890625, 1136.886474609375, 1058.8902587890625, -762.6334228515625, -728.5419921875, 1298.4576416015625, 1376.9710693359375, -38.22023010253906, -486.4619140625, -68.51534271240234, 560.5504150390625, 638.56005859375, -81.20037841796875, 104.72824096679688, 499.9517517089844, -371.46197509765625, 72.146240234375, 1420.00244140625, -304.5422668457031, -81.1412353515625, -223.3467254638672, 736.522216796875, 582.6229248046875, -58.72341537475586, 1815.119873046875, 580.824951171875, 611.2733154296875, 962.3796997070312, 76.07331848144531, -233.05581665039062, 2386.197998046875, 1560.3009033203125, 272.45233154296875, 98.22614288330078, 229.61807250976562, 404.1402282714844, 3312.255615234375, -1036.502197265625, -530.521728515625, 613.507568359375, -86.626708984375, 1220.609375, -62.61551284790039, 325.0919189453125, 151.133544921875, 7.806585311889648, 345.6692199707031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000619.npy"}
|
||||
{"epoch": 0.908957415565345, "step": 620, "batch_size": 64, "mean": 367.34222412109375, "std": 607.8278198242188, "min": -1031.25244140625, "p10": -273.6461517333984, "median": 337.4606628417969, "p90": 1327.2459838867187, "max": 1961.2783203125, "pos_frac": 0.765625, "sample": [430.2699279785156, -326.0052490234375, 456.3177490234375, 634.8692626953125, 454.18402099609375, 714.318359375, 193.6746063232422, 246.418212890625, -78.50492095947266, 1355.5078125, -284.5209045410156, 321.17864990234375, -248.271728515625, 1334.3751220703125, 485.4410400390625, 138.67027282714844, 1087.760009765625, 462.2524108886719, -49.700408935546875, 1386.9930419921875, 1961.2783203125, 198.90444946289062, 419.1668701171875, 1041.4583740234375, -525.3696899414062, 195.42025756835938, 600.1824340820312, 222.17105102539062, 172.44175720214844, 80.97421264648438, -134.42919921875, 1612.128173828125, 418.1304931640625, 1699.554443359375, -201.6988525390625, 493.80230712890625, 42.802757263183594, 495.109130859375, 22.793819427490234, 396.33056640625, 413.6600646972656, 469.1828308105469, -217.111572265625, -104.66555786132812, 1310.611328125, 737.2289428710938, 696.7799072265625, -1031.25244140625, 147.35781860351562, 217.41427612304688, -231.3502197265625, -992.1881103515625, 1723.267578125, 328.3575744628906, 236.03330993652344, 346.5637512207031, 462.4892883300781, 128.07339477539062, -486.61993408203125, 436.95379638671875, 273.46160888671875, 961.8853759765625, -637.3980712890625, 394.7880859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000620.npy"}
|
||||
{"epoch": 0.9104258443465492, "step": 621, "batch_size": 64, "mean": 558.34814453125, "std": 721.8560791015625, "min": -1617.6788330078125, "p10": -243.4786209106445, "median": 495.5881805419922, "p90": 1420.5224609375002, "max": 3028.694580078125, "pos_frac": 0.84375, "sample": [671.4598388671875, -415.6578674316406, -809.596435546875, -221.04054260253906, 1820.9010009765625, 43.055213928222656, 108.21387481689453, 609.9071655273438, 685.4673461914062, 960.8378295898438, 835.7540893554688, 323.4281005859375, 310.05255126953125, 861.2349853515625, 265.81097412109375, -343.8250427246094, -273.2986145019531, 863.5491333007812, -1617.6788330078125, 192.82305908203125, 1561.99658203125, 209.94598388671875, 956.2275390625, 510.067138671875, 1180.1644287109375, 3028.694580078125, 170.11239624023438, 1013.1932373046875, 1100.638671875, -90.11532592773438, 1346.9293212890625, 779.0449829101562, 861.4091796875, 330.00048828125, 844.1320190429688, 1662.8973388671875, 181.12039184570312, 686.8932495117188, 422.1890563964844, 481.1092224121094, -109.82328796386719, 1265.5184326171875, -253.09494018554688, 561.9906005859375, 279.4460754394531, 153.54476928710938, 1452.0623779296875, 314.3838806152344, 1310.0755615234375, 1213.421142578125, -953.5543212890625, 340.11663818359375, 95.95015716552734, 219.28384399414062, 320.37738037109375, 664.1293334960938, 906.5386962890625, 1597.6578369140625, 219.14456176757812, 359.59320068359375, 1825.7236328125, 931.4505615234375, 706.444091796875, 205.84910583496094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000621.npy"}
|
||||
{"epoch": 0.9118942731277533, "step": 622, "batch_size": 64, "mean": 399.9915466308594, "std": 617.837158203125, "min": -1051.384033203125, "p10": -143.3406066894531, "median": 375.58009338378906, "p90": 865.0925354003906, "max": 2674.97265625, "pos_frac": 0.828125, "sample": [681.3710327148438, 367.8480529785156, -153.46505737304688, 383.97344970703125, 568.9755859375, 372.01019287109375, -701.5899658203125, 379.1499938964844, 2464.942138671875, 593.6321411132812, 74.81214141845703, -296.558349609375, 81.40437316894531, -332.4693603515625, -26.586761474609375, 519.49853515625, 169.3922882080078, 389.2703857421875, 73.90090942382812, 260.182373046875, 1204.8309326171875, -10.2364501953125, 492.635498046875, 2674.97265625, 770.2977905273438, 58.097991943359375, 619.0169677734375, 754.8333740234375, 204.23133850097656, -1036.060546875, 232.00250244140625, 23.003662109375, 866.0392456054688, 201.61985778808594, 272.13616943359375, 862.883544921875, 487.748779296875, -480.16119384765625, 1065.1339111328125, 545.34521484375, 786.4171142578125, 852.911865234375, 15.196388244628906, 727.1640625, 216.04537963867188, -50.89361572265625, 388.3539123535156, -119.71688842773438, 59.80882263183594, 297.4103698730469, 1510.9970703125, 455.98040771484375, 746.290771484375, 650.3068237304688, 613.6019287109375, 681.0652465820312, -1051.384033203125, 26.352149963378906, 126.96998596191406, 1447.14404296875, 659.925537109375, 567.5150756835938, 9.470584869384766, 304.4610595703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000622.npy"}
|
||||
{"epoch": 0.9133627019089574, "step": 623, "batch_size": 64, "mean": 310.7744445800781, "std": 610.7889404296875, "min": -1160.532958984375, "p10": -247.31089935302734, "median": 235.0264129638672, "p90": 1060.4595336914067, "max": 2687.61669921875, "pos_frac": 0.703125, "sample": [52.20013427734375, 255.85577392578125, -788.2537231445312, 1320.3529052734375, -172.18096923828125, -15.155529022216797, 700.5990600585938, 57.288604736328125, 834.7266845703125, 36.3392333984375, 1420.310546875, 298.7559509277344, 607.5829467773438, 97.34654235839844, 699.8550415039062, -248.03294372558594, 746.6971435546875, 370.00347900390625, -275.55975341796875, 124.6673583984375, -151.3341522216797, 489.8278503417969, 1212.74462890625, -172.94384765625, -1160.532958984375, 117.8956298828125, 332.0989990234375, -694.1296997070312, 422.79913330078125, 593.3983154296875, -245.62612915039062, 417.7452087402344, -80.69344329833984, -90.2745132446289, -160.12808227539062, 96.26651000976562, 941.9410400390625, 689.7399291992188, 840.71533203125, 227.45669555664062, 157.01026916503906, -638.92333984375, 341.0033264160156, 1358.0479736328125, -19.388397216796875, -61.9869270324707, 2687.61669921875, 513.9531860351562, -28.784404754638672, -431.0439147949219, 1111.253173828125, 565.08154296875, 258.09991455078125, 298.56549072265625, 242.59613037109375, 1746.29736328125, 102.55712890625, 117.11112976074219, 268.19140625, 27.413402557373047, 531.7799072265625, 213.0025634765625, 781.988037109375, -2.24041748046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000623.npy"}
|
||||
{"epoch": 0.9148311306901615, "step": 624, "batch_size": 64, "mean": 469.9930419921875, "std": 565.01025390625, "min": -1042.8865966796875, "p10": -211.3280303955078, "median": 496.0617218017578, "p90": 1157.1246337890627, "max": 1480.5223388671875, "pos_frac": 0.78125, "sample": [603.2628784179688, -355.16839599609375, 676.2255859375, 332.902099609375, 498.96466064453125, 973.212890625, 1098.94482421875, 1480.5223388671875, 1445.392578125, 194.6236572265625, 179.71029663085938, -35.64569091796875, 173.60592651367188, 1182.058837890625, 919.2392578125, -83.98236083984375, 583.5763549804688, 768.9659423828125, 1223.736083984375, -979.2394409179688, 88.95570373535156, 277.6854248046875, 49.32178497314453, 856.0283203125, 379.9863586425781, 191.42111206054688, 818.1830444335938, 815.9992065429688, 1079.0445556640625, -215.97055053710938, 154.7240753173828, 1084.3604736328125, -200.4954833984375, 314.9439697265625, -444.6400146484375, 976.4934692382812, 450.37103271484375, -90.92559814453125, 948.165771484375, 386.902099609375, -1042.8865966796875, 75.0822525024414, -27.1044921875, 681.7864990234375, 1081.357421875, 1045.4986572265625, 288.09698486328125, 765.3643798828125, 943.4393310546875, 493.1587829589844, 189.0484161376953, -432.59912109375, -31.28779411315918, 1184.561767578125, 551.1448974609375, 603.4432983398438, -331.6918640136719, 604.1046142578125, -65.28553771972656, 726.6654052734375, 1307.4752197265625, 201.89385986328125, 1420.387451171875, 1046.437744140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000624.npy"}
|
||||
{"epoch": 0.9162995594713657, "step": 625, "batch_size": 64, "mean": 511.17987060546875, "std": 543.3054809570312, "min": -349.70208740234375, "p10": -21.310681152343733, "median": 424.667236328125, "p90": 1208.5734497070314, "max": 2218.273681640625, "pos_frac": 0.875, "sample": [93.92330169677734, 88.19876098632812, -270.9192199707031, 929.669921875, 248.81365966796875, 50.5760498046875, 826.6494140625, 1223.101806640625, 1012.47705078125, 948.3060302734375, 54.220458984375, 489.3820495605469, 292.09503173828125, 95.75889587402344, 1681.142822265625, 526.1766967773438, 209.2628936767578, 55.09059524536133, 2218.273681640625, 489.82208251953125, 2005.6119384765625, 224.1295623779297, 152.95401000976562, 624.9058837890625, 132.53237915039062, 745.7637939453125, 1782.684326171875, -29.424240112304688, 217.2412872314453, 320.1279296875, 466.35943603515625, 651.46484375, 456.4030456542969, -349.70208740234375, 561.5845947265625, -97.19000244140625, 83.87812042236328, 647.6220092773438, 310.5745849609375, 746.921875, 1174.6739501953125, 418.5238037109375, 1351.6466064453125, 421.2105712890625, 161.5283966064453, 444.9150695800781, -60.37565612792969, 428.1239013671875, 636.472412109375, 783.956298828125, 89.84049224853516, -2.3790435791015625, 938.4439697265625, 371.03875732421875, 55.28578567504883, 174.85671997070312, 166.51625061035156, 468.7269592285156, 1156.3111572265625, -334.2077941894531, 677.4659423828125, -85.93017578125, 1374.892333984375, 987.5092163085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000625.npy"}
|
||||
{"epoch": 0.9177679882525698, "step": 626, "batch_size": 64, "mean": 288.66387939453125, "std": 668.381591796875, "min": -2632.54296875, "p10": -395.3630432128905, "median": 273.9818801879883, "p90": 934.4569091796877, "max": 2064.61083984375, "pos_frac": 0.71875, "sample": [588.3182373046875, -14.62030029296875, 504.3626708984375, 348.51800537109375, 126.19522094726562, 233.9840545654297, 715.943603515625, 318.3312072753906, 886.7108764648438, -2632.54296875, 1644.6961669921875, 327.3091735839844, 1485.3892822265625, 2064.61083984375, -621.4680786132812, 192.93698120117188, 244.9019775390625, -465.8954162597656, -4.291290283203125, -154.1996307373047, 18.12298583984375, 246.9864959716797, 456.648681640625, 100.72803497314453, 26.513139724731445, 18.297714233398438, 155.64532470703125, -656.548095703125, 434.7272644042969, 629.3681640625, -125.67192077636719, 641.4161987304688, -543.0438232421875, -694.8964233398438, 587.2951049804688, 1371.68701171875, 862.9049682617188, 357.3697204589844, 365.28277587890625, 657.2705078125, 562.9526977539062, -230.78750610351562, 954.9194946289062, 477.41961669921875, 1768.8349609375, 704.3167114257812, -113.42195892333984, 100.0007553100586, 104.60275268554688, 390.82012939453125, -54.26708984375, 816.7484130859375, 300.9772644042969, 1147.6005859375, -17.141265869140625, 205.83763122558594, 687.6873168945312, 346.7723388671875, -667.9185791015625, -85.50669860839844, 138.98130798339844, -47.003684997558594, 306.9683837890625, -24.199565887451172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000626.npy"}
|
||||
{"epoch": 0.9192364170337739, "step": 627, "batch_size": 64, "mean": 585.8370361328125, "std": 846.9072265625, "min": -720.9466552734375, "p10": -175.76781005859374, "median": 423.5992431640625, "p90": 1452.641650390625, "max": 4083.74365234375, "pos_frac": 0.75, "sample": [584.64892578125, 247.742919921875, 1164.2430419921875, 481.7475891113281, -167.52285766601562, -68.28987121582031, 118.75870513916016, 639.830322265625, 566.4345092773438, 428.4001770019531, 1254.297119140625, -484.1510925292969, 208.39404296875, 489.5845642089844, -104.73706817626953, 1034.021484375, -145.17251586914062, 839.8618774414062, 950.564697265625, 1129.8133544921875, -718.6619873046875, -179.30136108398438, 621.671142578125, -20.13581657409668, 1745.09326171875, 194.64895629882812, 418.7983093261719, 2497.11083984375, 975.560791015625, 382.40338134765625, 1951.179443359375, 74.00849914550781, 570.2305297851562, 1204.4278564453125, 538.8013305664062, 1721.9776611328125, 1426.6854248046875, 177.81655883789062, 535.572265625, -152.49774169921875, 118.50830841064453, -21.936525344848633, 3324.06494140625, 1463.7657470703125, 880.61767578125, -227.83651733398438, 4083.74365234375, -272.806396484375, 503.8492126464844, -83.88650512695312, 1004.2366333007812, 399.8402404785156, 899.7656860351562, -720.9466552734375, 267.9881591796875, 312.7360534667969, 430.968994140625, 230.3876495361328, -405.8827819824219, 1299.891845703125, -39.713356018066406, 254.6306915283203, 295.4400939941406, 362.2850341796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000627.npy"}
|
||||
{"epoch": 0.920704845814978, "step": 628, "batch_size": 64, "mean": 441.0691833496094, "std": 646.2359008789062, "min": -798.4420166015625, "p10": -342.1834686279296, "median": 374.0031280517578, "p90": 1329.3171508789062, "max": 2156.11328125, "pos_frac": 0.703125, "sample": [339.4017028808594, -22.77862548828125, 408.60455322265625, 1312.0416259765625, 1497.44482421875, 164.59414672851562, 671.3839111328125, 136.02764892578125, -205.71653747558594, -48.38592529296875, 142.75387573242188, 172.24847412109375, -798.4420166015625, 1054.90625, 1093.9254150390625, 1195.411376953125, -58.06379699707031, -262.166748046875, 1629.747802734375, 504.72052001953125, 2156.11328125, 535.4434204101562, 246.80404663085938, -202.4378662109375, -62.018272399902344, -376.4763488769531, 511.3955078125, 911.7595825195312, -433.889404296875, -438.1736755371094, 959.4857788085938, 280.9152526855469, 794.956787109375, 1313.879638671875, 1282.48583984375, 867.169189453125, 464.1830749511719, 731.4976806640625, 152.76780700683594, 179.71575927734375, -144.94668579101562, 545.58984375, 511.579833984375, 113.03229522705078, 1550.1636962890625, 1349.970947265625, 1335.9332275390625, 240.77896118164062, -392.48748779296875, -748.3157348632812, -107.02169799804688, -168.6269073486328, 1337.1883544921875, 841.01025390625, -702.1393432617188, -32.40399932861328, 1086.311767578125, 430.8770751953125, 525.2048950195312, 131.45330810546875, 1174.892333984375, 564.8997192382812, -40.95472717285156, 23.203845977783203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000628.npy"}
|
||||
{"epoch": 0.922173274596182, "step": 629, "batch_size": 64, "mean": 401.8076477050781, "std": 540.7566528320312, "min": -1010.6290893554688, "p10": -263.29606781005856, "median": 414.2801208496094, "p90": 1145.1170532226563, "max": 1719.5050048828125, "pos_frac": 0.796875, "sample": [218.1041259765625, 1234.119140625, -338.8468017578125, -456.2355041503906, 375.7115478515625, 415.073486328125, 398.194091796875, 789.2224731445312, 1264.0523681640625, 532.0385131835938, 542.4671020507812, 784.045654296875, -85.9698257446289, 447.6711120605469, 1240.77197265625, -170.27667236328125, 187.35296630859375, 31.300968170166016, 34.59259796142578, 581.2565307617188, 30.9556884765625, 1117.8179931640625, 423.6675720214844, -204.19081115722656, 166.96852111816406, 489.9734191894531, -786.6197509765625, -131.54632568359375, 1104.654052734375, 744.6605224609375, 73.09488677978516, 819.0254516601562, 1544.96533203125, 1719.5050048828125, 273.5823059082031, 135.36045837402344, -1010.6290893554688, 1316.60791015625, 946.9998779296875, -288.62689208984375, -290.97113037109375, -660.0709838867188, 342.22088623046875, 368.78424072265625, 512.386474609375, 1156.816650390625, 305.2991027832031, 506.95025634765625, 699.6610717773438, -27.59722328186035, 813.7218017578125, 369.9530029296875, 150.97265625, 644.91357421875, 207.73577880859375, 523.6707763671875, 33.31330108642578, 655.6342163085938, 897.4722900390625, 515.9534301757812, 413.48675537109375, 543.9052734375, 609.99658203125, -89.39212036132812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000629.npy"}
|
||||
{"epoch": 0.9236417033773862, "step": 630, "batch_size": 64, "mean": 228.9735565185547, "std": 694.4979248046875, "min": -1351.7347412109375, "p10": -466.42320251464844, "median": 164.32184600830078, "p90": 994.2103881835939, "max": 3232.13671875, "pos_frac": 0.6875, "sample": [1136.8509521484375, -1210.330810546875, -564.8382568359375, 353.3958740234375, -230.17843627929688, 691.6629028320312, 64.95207977294922, -231.83599853515625, 2354.0654296875, -242.66995239257812, 666.5784912109375, -104.56739807128906, 189.66879272460938, 1010.6966552734375, 824.6498413085938, -1351.7347412109375, 177.49514770507812, -587.864013671875, 307.4931640625, 203.51744079589844, 403.4898376464844, -172.4143829345703, 144.17381286621094, 1461.5733642578125, -195.4530792236328, 3232.13671875, 63.26640319824219, 955.742431640625, 131.36282348632812, -235.3642120361328, 34.04718017578125, -319.13494873046875, 285.14593505859375, -541.423583984375, 327.4678955078125, -469.1736755371094, -169.24513244628906, 352.2942810058594, 156.44203186035156, 408.16204833984375, 173.20123291015625, -192.38519287109375, 291.3041687011719, -659.4554443359375, 165.14794921875, 370.54034423828125, 133.87109375, 284.37811279296875, 680.083740234375, -63.96253204345703, 78.33467102050781, 1034.559326171875, 657.6636352539062, 163.49574279785156, -299.3035888671875, 40.64881896972656, 401.9081726074219, 123.18809509277344, 633.4972534179688, -460.00543212890625, 1191.14013671875, 196.19834899902344, 368.044921875, 32.11102294921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000630.npy"}
|
||||
{"epoch": 0.9251101321585903, "step": 631, "batch_size": 64, "mean": 368.1886291503906, "std": 666.9747924804688, "min": -1875.023193359375, "p10": -419.7190795898437, "median": 354.68621826171875, "p90": 1190.35810546875, "max": 1934.2716064453125, "pos_frac": 0.71875, "sample": [1188.35791015625, 974.5239868164062, -424.4015808105469, 352.41717529296875, -367.99151611328125, 496.7108154296875, -345.1336364746094, -143.51409912109375, -230.02053833007812, -514.709716796875, 1641.197265625, 1238.2806396484375, -474.5953063964844, 534.4418334960938, -558.0241088867188, 152.2227325439453, 572.9387817382812, 919.3751220703125, 33.098243713378906, -408.7932434082031, 156.20724487304688, -33.66636657714844, 282.6677551269531, -649.2046508789062, 1010.0526123046875, 66.46255493164062, 120.8396987915039, -218.2710418701172, 801.364990234375, 792.38037109375, -1875.023193359375, 496.9191589355469, 1934.2716064453125, 711.8433227539062, 356.95526123046875, 606.7335815429688, 333.537353515625, -163.67787170410156, -162.86724853515625, 1191.21533203125, 135.44607543945312, 635.1707763671875, 1575.0936279296875, 924.7788696289062, 621.130859375, -864.1566772460938, 1148.8072509765625, 345.1605224609375, 1495.197998046875, 668.2318725585938, 54.69158172607422, 631.8358764648438, -162.26177978515625, 413.3781433105469, 361.32147216796875, 206.567626953125, 1212.473876953125, 701.2156982421875, 1008.631103515625, -84.11922454833984, 230.971435546875, 873.3966064453125, 263.0690612792969, 772.9176635742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000631.npy"}
|
||||
{"epoch": 0.9265785609397944, "step": 632, "batch_size": 64, "mean": 392.960205078125, "std": 649.0064086914062, "min": -1274.6002197265625, "p10": -334.2511871337891, "median": 370.2868957519531, "p90": 1146.819812011719, "max": 2326.781982421875, "pos_frac": 0.734375, "sample": [655.0185546875, -763.1083374023438, 287.141357421875, 382.36773681640625, 634.414306640625, -1274.6002197265625, 1166.69140625, 1100.4527587890625, 1557.8895263671875, 998.0347290039062, 770.9824829101562, 680.083251953125, 227.906005859375, 903.5234375, 203.45938110351562, 213.00601196289062, 580.9195556640625, 456.0211486816406, 46.60622787475586, 95.4214859008789, 1631.827880859375, 355.5776062011719, 724.003173828125, 489.09185791015625, -501.91937255859375, 1172.82373046875, 461.1761474609375, -319.2027893066406, 440.35491943359375, -234.6361083984375, -1100.3720703125, 343.8437194824219, 1083.7977294921875, 1081.5115966796875, 580.6038818359375, 772.1907348632812, 1044.1600341796875, 762.4081420898438, 382.2241516113281, -327.6755065917969, 1308.349365234375, -96.12842559814453, -234.4945068359375, -113.51907348632812, 622.5343627929688, 289.8134765625, 691.2138671875, 183.60601806640625, 1172.246826171875, 2326.781982421875, 1000.6231689453125, -118.65267944335938, -500.564697265625, 133.48492431640625, 129.62620544433594, -337.0693359375, 358.3496398925781, 308.5388488769531, -556.283935546875, -2.5679931640625, 89.61485290527344, -226.21470642089844, -117.81819915771484, 1073.9622802734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000632.npy"}
|
||||
{"epoch": 0.9280469897209985, "step": 633, "batch_size": 64, "mean": 281.1269226074219, "std": 587.009521484375, "min": -1113.125244140625, "p10": -364.37037658691406, "median": 248.6222915649414, "p90": 1121.1532958984378, "max": 1527.3831787109375, "pos_frac": 0.703125, "sample": [-330.1580810546875, 1382.63330078125, 861.89208984375, -719.4965209960938, 472.54248046875, 326.23187255859375, -390.79339599609375, -83.38284301757812, 1227.74462890625, 84.56352996826172, 241.42691040039062, 153.59283447265625, 401.77899169921875, -1113.125244140625, 618.1500244140625, 134.8089141845703, 165.20654296875, 1174.6845703125, 321.8448791503906, -258.0446472167969, -125.01355743408203, 351.2825622558594, 308.072265625, -39.99095916748047, 718.652099609375, 121.13540649414062, -59.54270935058594, -188.91558837890625, -894.1070556640625, 1009.2698974609375, 350.3699951171875, 238.33013916015625, 1161.1319580078125, 503.9486999511719, 822.7189331054688, -66.16517639160156, -330.4901123046875, -1070.8818359375, -372.6436767578125, 118.46399688720703, 1394.783935546875, 453.6856384277344, 249.75897216796875, -803.6199951171875, 600.904052734375, 649.1912231445312, 502.359130859375, 1527.3831787109375, 981.1090087890625, 1027.8697509765625, 123.94827270507812, 250.826171875, -345.0660095214844, 969.7202758789062, 247.48561096191406, -27.865463256835938, 161.75453186035156, 451.91552734375, 1330.98193359375, 87.37796020507812, 568.391357421875, -5.664031982421875, 73.73303985595703, 293.4326171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000633.npy"}
|
||||
{"epoch": 0.9295154185022027, "step": 634, "batch_size": 64, "mean": 243.2664337158203, "std": 595.4829711914062, "min": -964.5828247070312, "p10": -458.78165283203117, "median": 183.95001220703125, "p90": 1034.9295654296875, "max": 2206.643798828125, "pos_frac": 0.65625, "sample": [194.1242218017578, 820.3815307617188, -260.9128112792969, 1562.34375, -575.2748413085938, 50.966552734375, 29.2073917388916, 181.59884643554688, 291.8164978027344, 1342.4517822265625, 34.90374755859375, 357.03173828125, -96.2139663696289, 288.64410400390625, 657.1220092773438, 186.30117797851562, 294.47540283203125, -169.05853271484375, -715.330322265625, 1010.761962890625, 1142.0767822265625, -221.4523162841797, -190.33070373535156, 102.84768676757812, -45.472808837890625, 60.27727508544922, 73.50735473632812, 694.8583984375, 471.4665222167969, 29.166492462158203, 2206.643798828125, 312.64361572265625, 616.9056396484375, -964.5828247070312, 1045.287109375, 1198.1846923828125, 640.5433349609375, -381.1195068359375, -492.0654296875, 200.73941040039062, 768.620361328125, -128.73023986816406, -103.70075225830078, -903.64794921875, 218.42747497558594, -587.1417846679688, 48.34391403198242, -507.1888427734375, -178.0030517578125, 913.5374755859375, -357.47113037109375, 767.09619140625, 448.048095703125, 1318.26123046875, 407.2522888183594, 91.57481384277344, 264.5868835449219, 681.6396484375, -89.65030670166016, 248.56466674804688, -89.86492919921875, -20.767745971679688, -284.6392517089844, 658.4407958984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000634.npy"}
|
||||
{"epoch": 0.9309838472834068, "step": 635, "batch_size": 64, "mean": 357.1339111328125, "std": 645.9878540039062, "min": -1089.2183837890625, "p10": -296.12164764404287, "median": 327.056640625, "p90": 997.9147949218752, "max": 2458.15478515625, "pos_frac": 0.71875, "sample": [828.524169921875, 121.41004943847656, 2458.15478515625, -102.733642578125, 426.0731506347656, -74.94026947021484, 133.29058837890625, 22.199230194091797, -11.381210327148438, 497.85308837890625, 168.88519287109375, 1430.2628173828125, -49.11534118652344, 838.5813598632812, -531.330322265625, 377.959228515625, 458.7346496582031, -20.893035888671875, 831.46533203125, 27.966079711914062, 379.1455078125, 161.54954528808594, 704.7122802734375, 942.8863525390625, 332.815185546875, 51.406166076660156, 11.782608032226562, 384.6563720703125, -198.48353576660156, 1968.912841796875, -96.3713150024414, 376.761474609375, 338.2779541015625, 1320.3216552734375, 703.979736328125, -93.32176971435547, 91.99279022216797, 0.4330253601074219, 506.1995544433594, 1021.4984130859375, -182.9170684814453, 897.7877197265625, 882.7171630859375, 377.7978820800781, -714.217041015625, -182.82333374023438, 639.0450439453125, -48.858253479003906, -337.966552734375, 670.1234130859375, -350.78558349609375, -398.10479736328125, -1089.2183837890625, 757.6887817382812, 1218.53662109375, 321.298095703125, 369.7298889160156, 675.698974609375, -901.7706298828125, 2294.2431640625, 155.302734375, 470.0411071777344, 293.62518310546875, 299.4766845703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000635.npy"}
|
||||
{"epoch": 0.9324522760646109, "step": 636, "batch_size": 64, "mean": 424.78887939453125, "std": 662.819091796875, "min": -1038.6435546875, "p10": -231.4474411010742, "median": 330.1589050292969, "p90": 988.5485290527345, "max": 3623.93701171875, "pos_frac": 0.734375, "sample": [44.768890380859375, -369.0714111328125, 90.27035522460938, -166.59811401367188, 314.2300720214844, -434.299072265625, 394.2744140625, 325.96807861328125, 544.0062255859375, 112.95577239990234, 802.123291015625, -349.38800048828125, 381.6951904296875, 1122.74072265625, -23.939491271972656, 1012.7492065429688, -44.9119873046875, 2296.793212890625, 1189.981201171875, 530.6761474609375, 217.15773010253906, 992.6016235351562, -164.42996215820312, -280.279296875, 696.9256591796875, 273.7805480957031, 979.09130859375, 382.71356201171875, 179.32322692871094, 3623.93701171875, 59.86192321777344, -123.49537658691406, 948.8117065429688, 1278.1778564453125, -366.2297058105469, 939.6123046875, 906.654296875, 396.8788757324219, 628.517333984375, -216.116943359375, 694.5601806640625, 221.9744110107422, 768.6856689453125, 851.9625244140625, 653.5189819335938, 303.31732177734375, 749.762939453125, -101.41378784179688, 276.56353759765625, -200.14532470703125, 181.2048797607422, -3.170440673828125, -1038.6435546875, 334.3497314453125, 311.5173034667969, -238.0176544189453, 930.3839721679688, 758.1934814453125, 316.3987121582031, 456.9288330078125, -6.5925140380859375, 378.67034912109375, 492.6803894042969, 965.2800903320312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000636.npy"}
|
||||
{"epoch": 0.933920704845815, "step": 637, "batch_size": 64, "mean": 469.54571533203125, "std": 647.8881225585938, "min": -1010.3731079101562, "p10": -86.6541633605957, "median": 321.471923828125, "p90": 1427.0541992187502, "max": 2398.585693359375, "pos_frac": 0.8125, "sample": [124.36852264404297, 110.16683197021484, 548.17529296875, 9.922660827636719, 56.48988723754883, 1010.640625, 306.37359619140625, 63.7442626953125, 619.8492431640625, 1275.21142578125, 753.1951293945312, -1010.3731079101562, 75.1226806640625, 247.29159545898438, -238.3695526123047, 633.9061889648438, 886.4509887695312, 1456.1717529296875, 1035.8104248046875, 640.14208984375, 1464.3609619140625, 268.28680419921875, 160.12188720703125, 2398.585693359375, 99.57669067382812, 533.4848022460938, -254.18344116210938, 3.8405418395996094, 287.1208190917969, -53.352203369140625, 353.1391296386719, -176.58389282226562, -154.435302734375, 612.3173217773438, 403.0963439941406, -87.09999084472656, 336.57025146484375, 725.2593383789062, 841.0733642578125, 259.625244140625, 618.946044921875, 359.40521240234375, 250.75765991210938, 996.564453125, 804.864013671875, 575.2191772460938, 1443.951904296875, -1.4409103393554688, 1746.1158447265625, -955.2022705078125, 1173.6678466796875, -33.82252502441406, 360.188232421875, 1632.08349609375, 406.13519287109375, 146.89552307128906, 51.55775451660156, 1387.626220703125, -48.53257751464844, 2348.480712890625, 14.10965347290039, 108.58262634277344, 125.291015625, -85.61389923095703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000637.npy"}
|
||||
{"epoch": 0.9353891336270191, "step": 638, "batch_size": 64, "mean": 366.377685546875, "std": 718.7194213867188, "min": -3386.078125, "p10": -92.91921844482422, "median": 267.38604736328125, "p90": 1148.499267578125, "max": 1764.4619140625, "pos_frac": 0.734375, "sample": [1292.222900390625, -834.0191040039062, -3386.078125, 935.1614379882812, 276.71759033203125, 138.53598022460938, 35.4598388671875, -25.5557861328125, 1133.932373046875, 328.2236633300781, 748.46337890625, 1346.3824462890625, 57.68748474121094, 720.5791625976562, 253.71775817871094, 483.575439453125, 679.1924438476562, -502.2288513183594, 529.10546875, -21.097454071044922, 442.9212951660156, 42.59923553466797, -239.82781982421875, 122.22199249267578, 308.70782470703125, 947.4032592773438, 1523.96435546875, -18.813678741455078, 663.47265625, 236.47274780273438, 741.6259765625, 258.05450439453125, 712.8362426757812, -77.7247543334961, -92.94639587402344, 934.2969970703125, 970.3873291015625, -55.67277145385742, 347.4693908691406, -341.5472717285156, -24.380231857299805, 419.2364196777344, 237.29202270507812, 1764.4619140625, -92.63549041748047, -25.728044509887695, 946.6337890625, 120.95332336425781, 613.1932983398438, 1154.5538330078125, -45.816497802734375, -92.85580444335938, 1018.3814697265625, 497.44000244140625, 176.98361206054688, 87.94857788085938, 41.290016174316406, 51.324928283691406, 852.3038940429688, 1134.3719482421875, -137.96408081054688, 104.31053924560547, 1655.59814453125, 1375.3963623046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000638.npy"}
|
||||
{"epoch": 0.9368575624082232, "step": 639, "batch_size": 64, "mean": 166.44647216796875, "std": 696.6668701171875, "min": -1281.532958984375, "p10": -663.3500122070312, "median": 166.78636169433594, "p90": 1105.3677612304691, "max": 1683.3221435546875, "pos_frac": 0.625, "sample": [1542.2178955078125, -858.4353637695312, -515.956787109375, 1179.397216796875, 1003.910400390625, -420.91741943359375, 309.1643371582031, 382.9885559082031, -572.0902709960938, 838.7384033203125, 1634.1431884765625, -56.455936431884766, 388.2599792480469, 603.3709716796875, -1007.1493530273438, 299.8406677246094, -418.0322265625, -445.35260009765625, 51.857879638671875, 427.8101501464844, 708.159423828125, -1208.788330078125, 861.6383666992188, 606.2598876953125, -618.6256713867188, -1281.532958984375, -1063.1572265625, 278.6259460449219, 82.0921401977539, 32.65266418457031, 587.1513061523438, 529.3971557617188, 1.6189498901367188, -666.445556640625, 1148.8494873046875, 1236.593017578125, -374.1380615234375, 832.8739624023438, -331.0767822265625, -9.776710510253906, 61.841087341308594, -63.578887939453125, 660.7100830078125, 432.8426818847656, 1683.3221435546875, -818.8806762695312, -227.02383422851562, 98.36702728271484, -315.6968994140625, -439.07305908203125, 163.0733184814453, 730.99755859375, 284.39837646484375, 939.8582153320312, -622.8057861328125, -333.0620422363281, -656.1270751953125, 170.49940490722656, 118.65443420410156, 670.8967895507812, 336.7237548828125, 1288.5947265625, 573.6690673828125, 194.69345092773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000639.npy"}
|
||||
{"epoch": 0.9383259911894273, "step": 640, "batch_size": 64, "mean": 554.8192138671875, "std": 775.8626098632812, "min": -935.2141723632812, "p10": -212.07056732177733, "median": 411.3685302734375, "p90": 1418.1853149414064, "max": 2963.619140625, "pos_frac": 0.765625, "sample": [757.0156860351562, 533.6309814453125, -506.5576171875, -171.81053161621094, 137.86212158203125, 607.24072265625, 284.12664794921875, 1369.7447509765625, 1323.7325439453125, 214.16421508789062, 1438.945556640625, 1644.0430908203125, 949.3600463867188, 46.882720947265625, -290.9989929199219, 487.6976013183594, 879.7160034179688, -169.244384765625, -62.62648010253906, 1323.857666015625, -867.697998046875, 953.987548828125, 399.80224609375, 767.6246337890625, -215.62130737304688, -935.2141723632812, 121.59616088867188, 362.12420654296875, 2963.619140625, 900.7900390625, 599.2833251953125, 1007.624267578125, -654.6163940429688, 610.68359375, 2561.228271484375, 325.6861267089844, -171.98678588867188, 249.70484924316406, 82.0634765625, 344.8682861328125, -84.86674499511719, 188.8777618408203, 10.334457397460938, 1226.0816650390625, 1002.5758056640625, 166.97549438476562, 888.4686279296875, 671.6976318359375, -487.5157775878906, 1328.0262451171875, 2194.59814453125, 348.4496154785156, -21.464447021484375, 1072.37841796875, -30.372238159179688, 684.0817260742188, 422.934814453125, 860.2750854492188, 92.66340637207031, 743.6536254882812, 1479.3560791015625, 2465.0654296875, -203.78550720214844, 287.6089782714844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000640.npy"}
|
||||
{"epoch": 0.9397944199706314, "step": 641, "batch_size": 64, "mean": 367.82965087890625, "std": 749.135986328125, "min": -1224.53173828125, "p10": -550.0671722412109, "median": 374.3223114013672, "p90": 1120.0082763671876, "max": 3079.039794921875, "pos_frac": 0.75, "sample": [165.18048095703125, 371.960693359375, 535.634033203125, 1126.9906005859375, -576.4561767578125, -135.81527709960938, 347.9476318359375, 1521.8037109375, 495.76318359375, 210.6702423095703, -1080.460205078125, 599.931884765625, 35.30958557128906, -425.7651062011719, 3079.039794921875, -573.6497192382812, -495.0412292480469, -462.426025390625, 8.967514038085938, 983.45361328125, -105.25027465820312, 66.1180191040039, 111.52227783203125, 590.4420776367188, 240.24539184570312, 672.447998046875, -1210.8265380859375, 877.1605224609375, 561.7630615234375, 159.9490203857422, 971.74609375, 481.19903564453125, 620.5006103515625, 72.6341552734375, 171.2369384765625, -37.38895034790039, 1297.9334716796875, 307.9871826171875, 801.8715209960938, 658.9553833007812, 128.9901123046875, 16.190309524536133, -73.7380142211914, -681.9588623046875, 280.49957275390625, 813.7985229492188, -1224.53173828125, 1103.7161865234375, 992.2409057617188, 974.1093139648438, 2363.845703125, 813.1953735351562, -1110.907470703125, 736.0506591796875, -21.197242736816406, 376.6839294433594, 624.9957885742188, -357.50189208984375, 589.33154296875, 376.9907531738281, 773.6009521484375, 1235.7564697265625, 591.3789672851562, 1176.2711181640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000641.npy"}
|
||||
{"epoch": 0.9412628487518355, "step": 642, "batch_size": 64, "mean": 445.2503662109375, "std": 907.1710205078125, "min": -1610.4476318359375, "p10": -373.2165893554687, "median": 308.92420959472656, "p90": 1354.170703125001, "max": 4856.80517578125, "pos_frac": 0.734375, "sample": [99.11622619628906, 643.605224609375, 816.7568359375, 622.1434936523438, 267.19122314453125, -1610.4476318359375, 773.597412109375, 290.9181213378906, -68.93099212646484, -589.9583740234375, -105.45990753173828, 100.18148803710938, 1479.45849609375, -389.6158447265625, 841.4033203125, 1020.8838500976562, 451.48419189453125, 729.6207885742188, -841.2777709960938, 45.47780990600586, 227.92526245117188, -135.615478515625, 428.14617919921875, 44.68846130371094, 287.5089111328125, 654.024658203125, -63.807899475097656, 225.52581787109375, 97.43035888671875, 413.6755676269531, 924.961669921875, 607.5187377929688, 790.6767578125, 2424.818115234375, -231.74612426757812, 72.22402954101562, 326.9302978515625, 741.7490234375, -519.64697265625, 219.19894409179688, 843.3709106445312, 163.7967529296875, 446.1053771972656, 1457.8125, -334.95166015625, 1112.33984375, 778.5526123046875, 1801.0777587890625, -192.00868225097656, -78.2752685546875, 4856.80517578125, 955.1101684570312, -675.6651000976562, 447.404052734375, -183.5696258544922, -194.3476104736328, 347.9881591796875, 2476.012451171875, 42.577362060546875, -1018.7236328125, 444.1627197265625, 1680.6024169921875, 230.13311767578125, 977.3790893554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000642.npy"}
|
||||
{"epoch": 0.9427312775330396, "step": 643, "batch_size": 64, "mean": 342.09228515625, "std": 557.9434814453125, "min": -748.882568359375, "p10": -416.50483093261715, "median": 323.69895935058594, "p90": 1032.1501831054688, "max": 1930.4443359375, "pos_frac": 0.765625, "sample": [358.9494323730469, 283.3996276855469, 165.5831756591797, 848.6129150390625, 811.1127319335938, 666.060791015625, 492.00390625, -70.51171112060547, 323.57867431640625, 912.2095947265625, 15.747045516967773, -681.4571533203125, 294.8492736816406, 257.5123596191406, -201.96888732910156, 975.61181640625, 819.3618774414062, 1066.938720703125, 1048.3897705078125, 503.13848876953125, 167.56373596191406, 40.00288391113281, -317.5326843261719, 234.43106079101562, 61.41286087036133, 149.806884765625, -748.882568359375, 593.406494140625, 501.3657531738281, 993.470947265625, -490.22479248046875, 323.8192443847656, 359.8194580078125, 272.1544189453125, 994.2578125, 1197.99609375, 398.0217590332031, -449.2362060546875, 688.8675537109375, 341.8617858886719, -520.8171997070312, 1849.9503173828125, 1074.882568359375, 330.49224853515625, 661.4161987304688, 111.19074249267578, 2.964170455932617, 388.67108154296875, 1930.4443359375, 53.62907791137695, 918.073974609375, 624.011474609375, 715.142333984375, -280.10333251953125, 1054.18603515625, 41.50941467285156, -391.0775146484375, -427.4022521972656, 379.21240234375, -97.58395385742188, -163.529296875, -151.16201782226562, -569.3695068359375, 157.6702880859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000643.npy"}
|
||||
{"epoch": 0.9441997063142438, "step": 644, "batch_size": 64, "mean": 350.287109375, "std": 726.9475708007812, "min": -1838.5657958984375, "p10": -491.5324554443358, "median": 204.83700561523438, "p90": 1209.5008666992192, "max": 2432.180908203125, "pos_frac": 0.734375, "sample": [2432.180908203125, 33.405784606933594, 841.8756713867188, -1838.5657958984375, 540.79443359375, 360.57940673828125, -713.209716796875, 86.58790588378906, 921.1129760742188, 291.6221923828125, 43.65211486816406, -51.86070251464844, 107.49127960205078, 460.27850341796875, 140.74220275878906, 623.56689453125, 180.33514404296875, -7.7218017578125, 228.10208129882812, -533.6375122070312, 160.72183227539062, 2093.677001953125, 50.95130920410156, -641.76171875, 739.4037475585938, 831.143798828125, -1.9887161254882812, 472.13958740234375, -242.50872802734375, 1553.6884765625, -63.44054412841797, -296.8196105957031, 752.3021240234375, -81.17021179199219, 62.06590270996094, -24.2793025970459, 1328.205810546875, 756.630615234375, 667.298828125, 262.2093505859375, 119.89974212646484, -393.2873229980469, 530.7584228515625, 57.33832550048828, 1098.0107421875, 555.666748046875, -589.8530883789062, -239.13494873046875, 132.0852813720703, 1070.11474609375, 1565.27587890625, 834.5870361328125, 95.63417053222656, -1138.970458984375, 1035.407958984375, 1095.781005859375, 349.6898193359375, 1257.2823486328125, 29.507232666015625, 364.9104309082031, 511.2181701660156, 1946.0255126953125, 181.57192993164062, -576.9471435546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000644.npy"}
|
||||
{"epoch": 0.9456681350954479, "step": 645, "batch_size": 64, "mean": 174.8188934326172, "std": 707.9917602539062, "min": -1354.2305908203125, "p10": -705.8393005371094, "median": 81.15388870239258, "p90": 1141.5938720703127, "max": 2205.183349609375, "pos_frac": 0.609375, "sample": [430.6788635253906, 1076.14404296875, -690.815673828125, 960.1738891601562, -1354.2305908203125, -1130.966552734375, -707.2890014648438, 20.45111083984375, 421.9070129394531, -167.07899475097656, -88.08851623535156, -42.4742431640625, 52.732826232910156, -7.075992584228516, -89.60134887695312, -451.89630126953125, 412.21343994140625, 297.3450622558594, 920.8081665039062, 1169.643798828125, 666.3878173828125, -431.3079833984375, 48.0008544921875, -2.1213531494140625, 109.574951171875, 23.007854461669922, 1170.683349609375, -26.735939025878906, -778.4502563476562, -1199.544677734375, 190.17054748535156, 432.29791259765625, -623.060791015625, 1448.64599609375, 892.7515258789062, 9.098602294921875, 1877.7052001953125, 841.6080322265625, -1148.1712646484375, -702.4566650390625, 1302.11376953125, -73.62136840820312, 113.43153381347656, 171.17701721191406, 449.8038330078125, 548.5457763671875, 430.69012451171875, 309.104736328125, 229.92950439453125, 569.1828002929688, 40.75628662109375, -286.78594970703125, 1283.0096435546875, 21.492576599121094, 471.0670166015625, 2205.183349609375, -121.4277572631836, 308.01605224609375, -851.0762329101562, -126.9855728149414, 651.68408203125, 153.0039825439453, -67.96226501464844, -372.588623046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000645.npy"}
|
||||
{"epoch": 0.947136563876652, "step": 646, "batch_size": 64, "mean": 308.0731201171875, "std": 571.8493041992188, "min": -1009.4287719726562, "p10": -388.1632110595703, "median": 219.75778198242188, "p90": 1080.864147949219, "max": 1987.7760009765625, "pos_frac": 0.734375, "sample": [896.3077392578125, 1112.2615966796875, -35.95099639892578, 1388.2607421875, 594.90478515625, 232.7240447998047, 638.3053588867188, 687.7940063476562, 968.7298583984375, 26.354995727539062, 331.5789489746094, 497.71539306640625, -904.99267578125, 0.575164794921875, 1003.4322509765625, 170.4179229736328, -359.8125915527344, 273.6966857910156, 1053.133056640625, -114.68285369873047, 400.78521728515625, -140.37628173828125, -158.4466552734375, 635.0187377929688, 80.45114135742188, 324.9672546386719, 446.82513427734375, 234.99497985839844, 46.5230712890625, 846.1159057617188, 101.10406494140625, 462.6252136230469, 78.72488403320312, 189.38047790527344, -187.00794982910156, 206.79151916503906, 62.08868408203125, 1538.325927734375, -216.14051818847656, -1009.4287719726562, 391.6805419921875, 617.3441162109375, 80.33027648925781, -414.534423828125, -78.78074645996094, -521.1257934570312, -68.23857116699219, 51.510887145996094, -428.7747802734375, 1987.7760009765625, 1092.7489013671875, 240.66217041015625, 604.7332153320312, -91.37055969238281, 1348.5140380859375, -400.3134765625, -493.1168212890625, 65.2467269897461, 192.4127655029297, 172.40597534179688, 1213.622314453125, 964.2013549804688, 515.403076171875, 270.2664489746094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000646.npy"}
|
||||
{"epoch": 0.9486049926578561, "step": 647, "batch_size": 64, "mean": 379.8256530761719, "std": 530.0704956054688, "min": -1085.0350341796875, "p10": -174.6228256225586, "median": 393.8932189941406, "p90": 1078.556689453125, "max": 1888.49072265625, "pos_frac": 0.78125, "sample": [724.6217041015625, 616.2154541015625, 299.9800109863281, 248.53001403808594, 363.9700622558594, 1089.511962890625, 264.8812561035156, 398.8282165527344, 564.8453369140625, 521.8748168945312, 1571.552978515625, 155.75881958007812, 44.41998291015625, 1702.4501953125, 706.6473999023438, 493.5770263671875, -41.80805969238281, 1052.994384765625, 625.68017578125, -155.119140625, 217.03494262695312, 177.74371337890625, -25.70765495300293, -160.3477325439453, 594.65625, 404.1297607421875, -74.56773376464844, 1888.49072265625, -867.6040649414062, 797.5889892578125, -190.656982421875, -204.1304931640625, 83.88137817382812, 735.3792114257812, 997.8887329101562, 754.767333984375, 433.84130859375, 405.22283935546875, -523.5504760742188, 178.71263122558594, 556.088134765625, 51.96160125732422, 1156.7119140625, -180.74072265625, -84.51296997070312, 430.354248046875, 389.9855041503906, 397.8009338378906, -7.912010192871094, 1130.0340576171875, 198.1930694580078, 102.11698150634766, 1090.7926025390625, 200.364990234375, 20.874191284179688, 559.79443359375, -1085.0350341796875, 827.98486328125, -303.0338439941406, 363.65411376953125, 472.0837707519531, 597.5425415039062, 430.7711486816406, 120.78060913085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000647.npy"}
|
||||
{"epoch": 0.9500734214390602, "step": 648, "batch_size": 64, "mean": 558.360107421875, "std": 771.2487182617188, "min": -1023.7716674804688, "p10": -207.25274963378905, "median": 492.66680908203125, "p90": 1549.1890747070318, "max": 3488.336181640625, "pos_frac": 0.75, "sample": [492.8323059082031, 277.00921630859375, 199.82327270507812, 508.1465759277344, 252.76284790039062, 625.7941284179688, 1927.235595703125, 1602.9366455078125, 706.1400146484375, 1401.3831787109375, -38.403106689453125, -630.4890747070312, -82.23348999023438, 932.7424926757812, 264.4625244140625, -101.13368225097656, 1259.3485107421875, 681.0039672851562, 49.81956481933594, -209.463134765625, -25.380598068237305, 492.5013122558594, -75.00348663330078, 869.2307739257812, -327.30206298828125, 1821.0113525390625, 569.5702514648438, 128.17652893066406, -32.36277770996094, -129.8731689453125, 835.6236572265625, 748.402099609375, -203.41879272460938, 3488.336181640625, 646.1932373046875, 547.693115234375, -1023.7716674804688, 510.5963439941406, 68.35919189453125, -681.69921875, -208.8958740234375, 315.28680419921875, 2237.996826171875, 708.4653930664062, 709.60400390625, 409.31768798828125, 1423.778076171875, -66.71917724609375, 586.0291748046875, 704.136962890625, 1038.7711181640625, 371.1119079589844, 326.3377380371094, 475.46246337890625, 1309.6053466796875, 117.74532318115234, 525.11083984375, 776.904296875, 434.9170227050781, 2274.98046875, 1291.27490234375, 252.0301513671875, -453.8416442871094, 1829.037109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000648.npy"}
|
||||
{"epoch": 0.9515418502202643, "step": 649, "batch_size": 64, "mean": 272.9219665527344, "std": 667.59814453125, "min": -839.1541748046875, "p10": -473.2643798828125, "median": 214.38704681396484, "p90": 1036.956286621094, "max": 3316.512451171875, "pos_frac": 0.5625, "sample": [455.69683837890625, -16.22490692138672, 394.58544921875, -111.59439086914062, 485.1772766113281, -303.3934020996094, -38.88003158569336, 183.78033447265625, 711.7843017578125, 350.26361083984375, 536.04248046875, -143.17633056640625, -534.2919311523438, 84.54978942871094, -531.3367919921875, -117.15554809570312, -66.92698669433594, 411.6458435058594, -320.9356384277344, 248.717529296875, 685.6400146484375, 160.8287811279297, -470.3209533691406, -186.04090881347656, 925.9537963867188, -752.5446166992188, 664.5419311523438, 907.4436645507812, -81.63786315917969, 607.461181640625, 408.4737548828125, -367.19268798828125, 36.72979736328125, -70.54280853271484, 991.7821044921875, 550.7215576171875, 873.3306884765625, 383.73394775390625, 827.6416625976562, -399.1123046875, 1201.484130859375, 430.42510986328125, -2.8698463439941406, 1076.235595703125, 903.832763671875, -213.76785278320312, 1353.7601318359375, 3316.512451171875, 385.19842529296875, 374.78582763671875, -101.48810577392578, 244.99375915527344, -37.62303161621094, 884.3026733398438, -383.61883544921875, 1206.3184814453125, 1145.326416015625, -474.5258483886719, -16.605609893798828, -103.46265411376953, -835.7725830078125, -839.1541748046875, 1056.316650390625, -478.81610107421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000649.npy"}
|
||||
{"epoch": 0.9530102790014684, "step": 650, "batch_size": 64, "mean": 423.06866455078125, "std": 719.4315795898438, "min": -1161.5919189453125, "p10": -469.7320648193359, "median": 331.9364013671875, "p90": 1305.7423950195314, "max": 2435.885986328125, "pos_frac": 0.765625, "sample": [171.1776123046875, -599.98828125, 759.9981079101562, 768.9037475585938, 214.93838500976562, 963.6566162109375, 299.7662353515625, -957.9171142578125, 2176.63818359375, 629.9637451171875, 1138.052978515625, -755.6017456054688, 1388.8045654296875, 994.1852416992188, 885.8367919921875, 1252.6142578125, 328.609619140625, 1604.9208984375, 1138.350830078125, 2435.885986328125, 794.5347290039062, -47.03868865966797, 1631.7227783203125, 609.4502563476562, 241.3850860595703, 48.1412467956543, -91.73370361328125, 498.4768981933594, 183.62844848632812, 448.8073425292969, 614.2865600585938, -119.77241516113281, -321.7705078125, -973.61376953125, -451.72174072265625, 258.0656433105469, -27.888408660888672, 1944.89892578125, -112.38151550292969, 17.33477783203125, 510.6651611328125, 244.42727661132812, -477.4507751464844, 597.6663208007812, 502.9544372558594, 607.8717041015625, 80.59254455566406, 910.80810546875, 140.25897216796875, 1328.5115966796875, 302.7646484375, 335.26318359375, 639.230712890625, 942.326416015625, 837.1060180664062, 99.45104217529297, 484.74114990234375, -633.6854858398438, -1161.5919189453125, 241.6143798828125, 758.89306640625, 64.09339141845703, -361.9400329589844, 98.2136001586914], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000650.npy"}
|
||||
{"epoch": 0.9544787077826725, "step": 651, "batch_size": 64, "mean": 542.5181274414062, "std": 605.9367065429688, "min": -606.4207153320312, "p10": -64.90942382812499, "median": 436.37391662597656, "p90": 1286.8205078125, "max": 2219.025390625, "pos_frac": 0.859375, "sample": [870.2892456054688, 610.0585327148438, 2219.025390625, 100.5228271484375, -310.03729248046875, -14.96063232421875, 405.9976501464844, 610.7030639648438, 20.628829956054688, 152.1917266845703, 1278.4945068359375, 2211.279296875, 323.19122314453125, -159.58331298828125, 64.7142333984375, -77.0405502319336, 47.822269439697266, 527.7447509765625, -300.0570983886719, 819.5970458984375, 1706.59765625, 391.5494384765625, 824.10009765625, 305.35589599609375, 711.0202026367188, 56.27196502685547, -54.14666748046875, -69.52203369140625, 170.512451171875, 736.5078125, 181.8306121826172, 26.233680725097656, 948.2896118164062, 56.0709228515625, 1200.299560546875, 1029.1201171875, 573.66650390625, 73.83438110351562, 702.6505126953125, 4.219890594482422, 417.7236633300781, 1139.5721435546875, -292.072021484375, 549.740234375, 728.8452758789062, 659.4087524414062, 38.273590087890625, 939.3748168945312, 79.92820739746094, 660.4146728515625, 213.39566040039062, 355.7431640625, 1290.3887939453125, 1843.6749267578125, 395.02825927734375, -606.4207153320312, 1129.0487060546875, 455.024169921875, 1754.372802734375, 485.0326232910156, 1069.9437255859375, 1438.186279296875, 883.81982421875, 117.6673583984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000651.npy"}
|
||||
{"epoch": 0.9559471365638766, "step": 652, "batch_size": 64, "mean": 412.978759765625, "std": 680.460205078125, "min": -1406.5001220703125, "p10": -304.3774322509764, "median": 305.8762664794922, "p90": 1306.0082031250004, "max": 2448.010986328125, "pos_frac": 0.796875, "sample": [356.4085998535156, 129.41744995117188, 793.8242797851562, -1134.901123046875, 562.477294921875, 441.5021057128906, 186.5148162841797, -464.0238342285156, 772.8509521484375, 162.35235595703125, 1333.9093017578125, 1366.1417236328125, 1216.7166748046875, -1406.5001220703125, 73.46929168701172, 555.83447265625, 47.147369384765625, 854.22265625, 1345.4752197265625, -357.1651916503906, 1139.65673828125, 328.4469909667969, 98.89908599853516, -181.20599365234375, 41.78131866455078, 7.9041748046875, 877.3831787109375, -70.13182067871094, 404.9179992675781, 97.60315704345703, 120.04545593261719, 2448.010986328125, -375.3944396972656, 2299.60498046875, 331.9269714355469, 1590.585693359375, 36.53569030761719, 101.211669921875, 7.492006301879883, 31.084075927734375, 459.51409912109375, -16.32038116455078, 206.89346313476562, -100.6105728149414, 982.2008056640625, 902.484130859375, 897.400634765625, -113.92276763916016, 506.76507568359375, 738.0467529296875, 1704.032470703125, 86.22682189941406, 792.3049926757812, 235.34695434570312, -491.8009948730469, 428.2961120605469, 1240.9056396484375, -377.56024169921875, 384.3723449707031, 771.4781494140625, 231.47230529785156, 531.4949340820312, 283.3055419921875, -23.7169189453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000652.npy"}
|
||||
{"epoch": 0.9574155653450808, "step": 653, "batch_size": 64, "mean": 382.07867431640625, "std": 660.6478271484375, "min": -1091.6036376953125, "p10": -303.43859558105464, "median": 274.14305114746094, "p90": 1258.2618774414066, "max": 2227.124267578125, "pos_frac": 0.703125, "sample": [849.417236328125, -0.534454345703125, -144.9710235595703, 1852.6851806640625, 232.1973419189453, -317.4214172363281, 98.34913635253906, -222.1103515625, 74.1632080078125, 908.2011108398438, 583.0440673828125, -364.8043212890625, -64.12291717529297, 1311.7249755859375, 910.23486328125, 1285.447509765625, 847.851806640625, 1194.8287353515625, 1189.475830078125, -146.10113525390625, 183.63735961914062, 291.2102355957031, 1774.8072509765625, 472.38104248046875, 495.86480712890625, 780.05078125, 1049.5372314453125, 293.7947998046875, -14.326927185058594, -458.0728454589844, 1384.423095703125, 257.07586669921875, 476.47076416015625, -1091.6036376953125, -243.23605346679688, -171.98512268066406, 371.95379638671875, 540.71875, -670.4282836914062, 2127.662353515625, 2227.124267578125, 644.5110473632812, 6.047468185424805, -37.522544860839844, 624.1444091796875, 502.32305908203125, 18.12646484375, -270.81201171875, 240.02651977539062, 247.26947021484375, 522.0567626953125, 129.78932189941406, 729.8440551757812, 202.63406372070312, 476.8368225097656, -795.2595825195312, -347.8915100097656, 370.6474609375, 700.8844604492188, 119.51057434082031, 511.3088073730469, -261.7515869140625, 233.97140502929688, -268.2757568359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000653.npy"}
|
||||
{"epoch": 0.9588839941262849, "step": 654, "batch_size": 64, "mean": 369.70977783203125, "std": 625.1162719726562, "min": -543.942138671875, "p10": -333.85187072753905, "median": 311.2791290283203, "p90": 1318.5677368164063, "max": 2109.93017578125, "pos_frac": 0.65625, "sample": [51.03374481201172, 341.2115478515625, -212.96844482421875, 156.9312744140625, 433.68707275390625, -129.1029510498047, -329.61907958984375, 324.434326171875, -6.193279266357422, 785.14208984375, 726.3768920898438, -295.1293640136719, 112.17235565185547, 425.4180908203125, -464.12481689453125, -219.800537109375, -34.63213348388672, 2109.93017578125, 949.7902221679688, 561.3599853515625, 573.5173950195312, 1677.7745361328125, 452.92889404296875, 292.06854248046875, 1460.655517578125, 684.3557739257812, -169.3903045654297, 683.3805541992188, 148.46432495117188, -107.34979248046875, -149.5130157470703, 1308.2559814453125, 379.2309265136719, 26.413482666015625, 31.334320068359375, -28.03795623779297, 877.4180297851562, 150.83493041992188, 310.4598388671875, 387.9476013183594, 458.84161376953125, 710.0130004882812, 724.0264892578125, -265.9393615722656, -543.942138671875, 848.0328369140625, 331.2061767578125, 1811.289794921875, 1322.987060546875, -335.6659240722656, -26.28045654296875, 1438.060546875, 606.50244140625, 312.0984191894531, -64.07256317138672, -396.0399169921875, 583.1683959960938, -349.9765930175781, 1876.385986328125, 1307.4195556640625, 11.071868896484375, -366.41497802734375, -405.8221130371094, -202.19046020507812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000654.npy"}
|
||||
{"epoch": 0.960352422907489, "step": 655, "batch_size": 64, "mean": 389.4818115234375, "std": 620.5560913085938, "min": -761.013671875, "p10": -320.4558807373047, "median": 278.47413635253906, "p90": 1182.1852416992188, "max": 2355.885986328125, "pos_frac": 0.703125, "sample": [-348.81500244140625, -761.013671875, 1320.6580810546875, 181.26126098632812, 68.8997573852539, 1190.484130859375, 795.926025390625, 908.6652221679688, 294.9350280761719, -232.17581176757812, 712.2400512695312, 49.413185119628906, -66.17803955078125, -571.601318359375, -10.345098495483398, 1210.12255859375, -148.23873901367188, 867.4305419921875, -315.80853271484375, 1053.78369140625, -332.5748291015625, 429.67303466796875, 932.0966796875, -590.716552734375, -322.4476013183594, 582.173828125, 328.4253845214844, 111.04605102539062, -106.77708435058594, 291.3853759765625, -78.99471282958984, 1602.4349365234375, 162.8456573486328, 255.56016540527344, 639.1842651367188, 887.9697265625, -669.5636596679688, 750.4840698242188, 582.6358642578125, 1002.4669189453125, 411.9326171875, 63.946266174316406, 634.8972778320312, 2355.885986328125, 1604.4859619140625, -78.66206359863281, 339.9150695800781, -292.0514221191406, 1077.4429931640625, 561.3605346679688, -53.79240417480469, 98.7442626953125, 15.75848388671875, 265.5628967285156, 1162.8211669921875, 251.0865936279297, 1607.6900634765625, -203.8376007080078, -55.31257629394531, 543.8107299804688, 671.01611328125, 209.9511260986328, 157.4383087158203, 919.7951049804688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000655.npy"}
|
||||
{"epoch": 0.9618208516886931, "step": 656, "batch_size": 64, "mean": 647.8305053710938, "std": 707.4137573242188, "min": -507.18017578125, "p10": -106.18420333862304, "median": 435.26983642578125, "p90": 1723.362646484375, "max": 2541.660400390625, "pos_frac": 0.8125, "sample": [1516.2406005859375, 827.1543579101562, -507.18017578125, -43.18181228637695, 107.34906768798828, 281.08770751953125, 315.75653076171875, 589.5590209960938, 252.36936950683594, 813.6773681640625, 830.5458984375, 673.048583984375, 303.7315673828125, 38.505889892578125, 420.3958435058594, 1711.7275390625, -130.74444580078125, 135.69009399414062, 1728.34912109375, 178.74386596679688, 828.1083984375, -11.1671142578125, 2021.0146484375, -158.99209594726562, -113.55361938476562, 257.43292236328125, 958.2779541015625, 1177.98486328125, 485.70343017578125, 1114.936279296875, 378.0454406738281, 2541.660400390625, 697.1574096679688, -108.98728942871094, 2292.1865234375, 882.4197387695312, 450.1438293457031, 962.0711669921875, 2391.875732421875, 74.72421264648438, 2163.158203125, -162.11973571777344, 1407.53857421875, 37.2271728515625, 1086.5301513671875, 970.1409301757812, 375.01373291015625, 2101.378173828125, 314.6510925292969, 233.13613891601562, 755.5441284179688, 148.0972442626953, -192.17404174804688, 370.27679443359375, 948.8444213867188, 250.48391723632812, -75.92884826660156, 266.296630859375, -99.64366912841797, 1068.0491943359375, 937.5478515625, -2.664175033569336, 645.594482421875, 750.3065185546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000656.npy"}
|
||||
{"epoch": 0.9632892804698973, "step": 657, "batch_size": 64, "mean": 516.2921142578125, "std": 687.0513305664062, "min": -1582.7921142578125, "p10": -163.15619506835938, "median": 460.8197937011719, "p90": 1351.7451171875002, "max": 2418.077880859375, "pos_frac": 0.859375, "sample": [1070.251220703125, 30.11829376220703, 800.5647583007812, 465.46197509765625, 1474.6912841796875, 1555.0888671875, 739.1528930664062, 131.7371063232422, 1367.9854736328125, -156.5264892578125, -305.38201904296875, 506.8690185546875, 1313.8509521484375, -193.39378356933594, 221.1538543701172, 456.1776123046875, 61.580841064453125, 384.1393737792969, 99.98259735107422, 1534.6566162109375, 2309.38232421875, 498.39892578125, 75.55968475341797, 2418.077880859375, 1209.698486328125, 1242.5338134765625, 204.0351104736328, 57.85107421875, 684.6860961914062, 638.9312744140625, 818.9259033203125, 450.7420654296875, 33.413177490234375, -1582.7921142578125, 568.3857421875, 470.80621337890625, 261.958251953125, -165.99749755859375, 11.7420654296875, 316.258056640625, 654.6834716796875, -1007.351318359375, -255.30270385742188, 559.8665771484375, 951.4241943359375, 89.29766082763672, 250.79917907714844, 554.6788940429688, 481.8314208984375, 131.00506591796875, 750.56591796875, 720.6746826171875, 207.9438934326172, 802.8489990234375, 389.98834228515625, 359.0825500488281, 1289.6365966796875, 2393.537353515625, 426.8575134277344, -227.57290649414062, 280.5496520996094, -37.16450500488281, 685.5631103515625, 508.49566650390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000657.npy"}
|
||||
{"epoch": 0.9647577092511013, "step": 658, "batch_size": 64, "mean": 407.352783203125, "std": 607.00048828125, "min": -1120.95361328125, "p10": -375.47870178222655, "median": 394.4300994873047, "p90": 1232.2049926757816, "max": 2101.5654296875, "pos_frac": 0.75, "sample": [743.3314819335938, 731.4295654296875, 745.2391357421875, -340.88507080078125, 392.0473327636719, -197.58709716796875, 1127.5267333984375, 289.3591613769531, -438.2611083984375, 403.4881591796875, 361.9501953125, 1347.114990234375, 92.19529724121094, 213.60507202148438, 315.66912841796875, -570.5867919921875, 338.9518737792969, 388.791015625, 123.75162506103516, -64.67958068847656, 582.8521728515625, 113.45018768310547, 396.8128662109375, 150.36024475097656, -1120.95361328125, 917.2311401367188, -382.1831359863281, 634.4010009765625, 649.2379150390625, -154.20050048828125, -91.52787780761719, 261.90948486328125, -74.18954467773438, 421.26025390625, 280.24615478515625, 1500.5640869140625, -14.811532974243164, 1151.0037841796875, 751.9520263671875, 1127.024658203125, 92.41596984863281, 414.9573669433594, 538.6143188476562, 1375.21142578125, 1262.7015380859375, 577.0928344726562, -503.1775207519531, 575.1574096679688, 840.3411865234375, 813.228271484375, -486.86407470703125, 1387.5457763671875, -359.83502197265625, 2101.5654296875, 735.6296997070312, 1161.04638671875, 596.93798828125, 1447.59814453125, -318.2146301269531, 670.7998657226562, 81.49153137207031, 409.2400207519531, 188.20831298828125, -634.0044555664062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000658.npy"}
|
||||
{"epoch": 0.9662261380323054, "step": 659, "batch_size": 64, "mean": 408.43243408203125, "std": 594.0093383789062, "min": -878.2866821289062, "p10": -221.46066894531245, "median": 357.66741943359375, "p90": 1129.8806884765627, "max": 2592.271728515625, "pos_frac": 0.765625, "sample": [620.7976684570312, -25.946929931640625, 210.68490600585938, 910.015869140625, -150.4269256591797, -244.5683135986328, 1234.19384765625, -386.5322265625, -450.2359924316406, -146.8247833251953, 1034.58203125, 336.18634033203125, 556.8096313476562, -167.54283142089844, 76.08595275878906, 158.41775512695312, 642.5914916992188, 125.44912719726562, 223.4132080078125, 168.80130004882812, 346.7936096191406, 542.9948120117188, -434.4823913574219, 2592.271728515625, -878.2866821289062, 1402.515869140625, 368.5412292480469, 548.21875, 1164.2945556640625, 332.005615234375, 374.1376647949219, 453.4195556640625, 0.4413337707519531, -24.560623168945312, 689.4603271484375, 403.8741455078125, 52.72746276855469, 9.39621353149414, 61.8331298828125, 1160.8880615234375, -483.800048828125, -29.504173278808594, 699.6996459960938, -136.1195831298828, 769.6442260742188, 10.094173431396484, -72.4640121459961, 651.2373657226562, 1700.0499267578125, 1873.9217529296875, 812.4797973632812, 201.49122619628906, 827.5350341796875, 475.4282531738281, 479.5926513671875, 289.19677734375, 959.8314819335938, 1057.5301513671875, 601.62158203125, -301.7391052246094, 774.95703125, 139.70521545410156, 523.2531127929688, 423.599853515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000659.npy"}
|
||||
{"epoch": 0.9676945668135095, "step": 660, "batch_size": 64, "mean": 520.118896484375, "std": 654.8275146484375, "min": -1094.9327392578125, "p10": -240.6016372680664, "median": 475.0273132324219, "p90": 1433.5294311523444, "max": 2357.407470703125, "pos_frac": 0.78125, "sample": [-132.5821075439453, 248.780517578125, 1074.8568115234375, 489.1907043457031, 477.6175537109375, 495.32952880859375, 582.7609252929688, 1502.8660888671875, 1067.638427734375, 880.0176391601562, 351.0035705566406, -373.986083984375, 1213.59326171875, 574.4644165039062, 921.716064453125, 1685.7926025390625, -190.5841522216797, -62.41780090332031, -234.9504852294922, -217.51071166992188, 1063.88671875, 1489.4825439453125, 69.94532012939453, 460.0841979980469, 862.8431396484375, 1963.481201171875, 391.6819763183594, -245.94590759277344, 742.720947265625, 385.25299072265625, 116.0388412475586, 1302.97216796875, -525.3539428710938, 2357.407470703125, 267.4012756347656, 593.9880981445312, 765.7706909179688, 80.46943664550781, 437.0836181640625, 799.6572875976562, 214.51011657714844, 199.4851531982422, 711.9906005859375, 806.7564697265625, -158.27398681640625, 6.093353271484375, 130.5855712890625, 1676.190185546875, 924.1273803710938, 1126.564697265625, -276.7315673828125, -21.136390686035156, -519.0026245117188, 704.0185546875, 1016.0679931640625, 527.3828125, 142.82467651367188, 361.9529113769531, 545.0864868164062, 1848.5162353515625, -243.0235595703125, -1094.9327392578125, 472.43707275390625, 453.65625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000660.npy"}
|
||||
{"epoch": 0.9691629955947136, "step": 661, "batch_size": 64, "mean": 464.16583251953125, "std": 681.4691772460938, "min": -859.752685546875, "p10": -99.28571548461913, "median": 376.8994903564453, "p90": 1117.6450195312507, "max": 3411.3154296875, "pos_frac": 0.828125, "sample": [119.85639953613281, -100.29508209228516, 864.620849609375, -23.165483474731445, 397.13629150390625, 209.1282196044922, 543.6484985351562, -343.8459167480469, 708.120849609375, 322.75177001953125, 1422.069091796875, 344.5847473144531, 705.2792358398438, 614.65087890625, 3411.3154296875, 905.804931640625, 179.41030883789062, 46.6597900390625, -859.752685546875, 153.80010986328125, -10.726604461669922, 810.0725708007812, 146.16917419433594, 435.84942626953125, 218.33383178710938, 231.3723907470703, 346.32855224609375, 895.009765625, 1195.2440185546875, 56.840728759765625, 318.333251953125, 596.9386596679688, 587.7843017578125, 1278.2598876953125, 517.2638549804688, 266.8426513671875, 69.32586669921875, 356.6626892089844, 218.8627471923828, 520.3369750976562, -565.5216064453125, 397.67578125, -96.93052673339844, 802.0720825195312, 936.5806884765625, -783.478271484375, 796.05859375, 689.4566650390625, 39.48503875732422, 687.3460693359375, 2890.213623046875, -672.7070922851562, 256.94598388671875, 59.78227233886719, 763.3317260742188, 1434.7958984375, 122.30895233154297, 612.1287841796875, -190.69403076171875, 1332.5213623046875, 539.5419921875, 531.1691284179688, 448.77435302734375, -1.1257553100585938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000661.npy"}
|
||||
{"epoch": 0.9706314243759178, "step": 662, "batch_size": 64, "mean": 363.70428466796875, "std": 682.1414184570312, "min": -1302.299072265625, "p10": -437.1643920898437, "median": 385.4128875732422, "p90": 1130.444909667969, "max": 2386.787353515625, "pos_frac": 0.703125, "sample": [-62.707115173339844, 857.476318359375, 1203.516845703125, -1302.299072265625, 616.4332885742188, -483.75616455078125, 454.56744384765625, 856.1552734375, 1839.2828369140625, 17.748573303222656, 855.3123779296875, -403.2332763671875, -1074.3857421875, -181.65675354003906, 1414.9005126953125, 107.96880340576172, -15.524057388305664, 346.8359680175781, 2386.787353515625, 842.0814208984375, 1151.1409912109375, -172.03372192382812, 411.28131103515625, -181.30902099609375, 183.33013916015625, 795.5330810546875, 103.91304016113281, 969.879638671875, 411.4435729980469, 284.19537353515625, 338.98492431640625, -217.41845703125, 371.4609375, -341.2322692871094, 549.7030639648438, 1067.9964599609375, 571.5224609375, 399.3648376464844, 755.2271118164062, 618.2128295898438, 36.310150146484375, 191.1226348876953, 958.013427734375, 1874.94580078125, 784.8905029296875, 1257.2564697265625, -862.8580322265625, 447.2796630859375, 1082.154052734375, 347.17877197265625, -451.706298828125, 50.51992416381836, -92.38139343261719, 570.5944213867188, -570.8621826171875, -236.29153442382812, 595.1181640625, 544.39794921875, 358.4103088378906, 590.215087890625, 636.7416381835938, -783.13525390625, -321.076171875, -76.46498107910156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000662.npy"}
|
||||
{"epoch": 0.9720998531571219, "step": 663, "batch_size": 64, "mean": 439.39404296875, "std": 732.7092895507812, "min": -1619.02587890625, "p10": -191.28532333374017, "median": 286.7314910888672, "p90": 1173.6542846679688, "max": 3198.836181640625, "pos_frac": 0.78125, "sample": [836.607421875, 340.053955078125, 810.2310180664062, 45.95318603515625, 298.5816650390625, 1185.7501220703125, 199.55003356933594, 285.0259094238281, -55.82763671875, 355.5603332519531, 100.17186737060547, 158.0437469482422, 1571.4971923828125, 865.9147338867188, 64.56248474121094, 904.3997192382812, 821.6839599609375, 110.72968292236328, -1619.02587890625, 219.55572509765625, 288.43707275390625, 573.3624877929688, -310.11468505859375, 661.0220947265625, 649.0405883789062, 382.07470703125, 635.7124633789062, 122.37652587890625, 435.944091796875, 3198.836181640625, -296.14105224609375, 1112.8541259765625, 1649.2952880859375, 18.875381469726562, 661.3682250976562, 2212.135986328125, 1614.2991943359375, 81.55516052246094, 23.933992385864258, 1896.3221435546875, 1089.9554443359375, -222.50344848632812, 91.18798828125, 344.30572509765625, -105.9189453125, 589.4887084960938, 1035.8853759765625, 155.27223205566406, 136.0641326904297, 1019.4959716796875, 190.72850036621094, -33.408302307128906, 1145.4306640625, -1092.170654296875, 898.755615234375, -16.686939239501953, -527.2196044921875, -118.44303131103516, 84.83267211914062, 666.7972412109375, -315.13104248046875, -99.6902847290039, -86.00802612304688, 179.989501953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000663.npy"}
|
||||
{"epoch": 0.973568281938326, "step": 664, "batch_size": 64, "mean": 480.83197021484375, "std": 649.633544921875, "min": -811.4672241210938, "p10": -172.38188934326172, "median": 403.1311950683594, "p90": 1277.259753417969, "max": 2146.79931640625, "pos_frac": 0.765625, "sample": [1023.9490356445312, 1.5998821258544922, 548.104736328125, 688.6640625, 883.324951171875, -19.379249572753906, 327.5232849121094, 1902.0374755859375, -111.5423812866211, 1353.8409423828125, -458.79669189453125, 777.5130004882812, 387.74261474609375, 233.16807556152344, 635.2329711914062, 225.04876708984375, -723.004150390625, 454.95477294921875, -32.652122497558594, 546.13916015625, 1237.4654541015625, 1378.083251953125, 54.997802734375, 1192.3955078125, -173.77017211914062, 131.70889282226562, 1980.8438720703125, -213.5660400390625, 519.7266235351562, -551.9820556640625, 1155.61376953125, 210.47164916992188, 42.57293701171875, 1089.311279296875, 528.9273071289062, 401.4531555175781, -811.4672241210938, 404.8092346191406, 703.873291015625, 1136.7586669921875, 128.23487854003906, 485.1398620605469, 843.0798950195312, -117.0889892578125, 976.837646484375, 2146.79931640625, 937.0335693359375, 215.15969848632812, 2078.2080078125, -112.4578857421875, -154.67733764648438, 1294.314453125, 1086.537353515625, 172.1222381591797, 105.52633666992188, 485.7141418457031, 190.70852661132812, 313.395263671875, -169.14256286621094, -60.324981689453125, 424.7306213378906, 19.668548583984375, 791.9082641601562, -369.8778076171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000664.npy"}
|
||||
{"epoch": 0.9750367107195301, "step": 665, "batch_size": 64, "mean": 412.32568359375, "std": 530.8889770507812, "min": -717.0221557617188, "p10": -131.58336181640624, "median": 369.7187042236328, "p90": 1089.9761413574226, "max": 1775.973876953125, "pos_frac": 0.796875, "sample": [-129.72061157226562, 14.22610092163086, 458.6234436035156, 281.5840148925781, -132.38168334960938, 518.02197265625, 828.122802734375, -79.279541015625, 1534.1900634765625, 508.199951171875, 294.20074462890625, 524.926025390625, 137.86077880859375, -573.3123168945312, 606.6573486328125, 8.520448684692383, -47.47821044921875, 241.36672973632812, 16.535171508789062, 572.4468383789062, 703.8380737304688, 125.29408264160156, 434.315185546875, 455.1828308105469, -65.69828796386719, 309.4781799316406, -717.0221557617188, 1511.6375732421875, 727.0199584960938, -27.74417495727539, 777.1588134765625, 372.065673828125, 701.551025390625, 367.3717346191406, 1161.0595703125, 247.78485107421875, 1211.6153564453125, 534.342529296875, 417.7817077636719, 247.8815155029297, 221.0011749267578, 1775.973876953125, 579.057373046875, -335.87420654296875, 452.80072021484375, 901.767822265625, 345.40673828125, -511.26641845703125, 552.8804931640625, 924.1148071289062, 97.7355728149414, 205.35899353027344, 918.8804321289062, 210.51290893554688, 1686.255859375, -276.59320068359375, 681.1339111328125, -210.17294311523438, 163.06805419921875, 835.7131958007812, 1714.07666015625, 40.33552551269531, 444.6375732421875, -106.18557739257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000665.npy"}
|
||||
{"epoch": 0.9765051395007343, "step": 666, "batch_size": 64, "mean": 424.4996643066406, "std": 613.1046752929688, "min": -1076.9796142578125, "p10": -307.04052734375, "median": 323.110595703125, "p90": 1077.39521484375, "max": 2647.434814453125, "pos_frac": 0.78125, "sample": [799.8553466796875, 365.67547607421875, 719.0750122070312, 423.73126220703125, -421.6443786621094, 697.9525756835938, 1061.666748046875, 1008.3748779296875, 154.36819458007812, 955.9268798828125, -34.702659606933594, 245.72463989257812, 363.9931945800781, -481.340576171875, -290.85687255859375, 254.28114318847656, 282.9346008300781, -129.95025634765625, -78.023681640625, -104.0843505859375, 1289.4468994140625, -0.32274627685546875, 815.5665283203125, 562.830078125, 2066.849365234375, 10.876205444335938, 806.8848876953125, 265.1175537109375, -137.41964721679688, -653.6278686523438, 242.54946899414062, 653.219482421875, 294.51495361328125, 1084.135986328125, 904.257568359375, 441.5040283203125, 941.2737426757812, -346.8892517089844, 369.38623046875, 539.7926025390625, 42.5869255065918, 866.695068359375, -313.97637939453125, 1316.0369873046875, 328.8970947265625, 317.3240966796875, -1076.9796142578125, 1014.3768310546875, 363.3397521972656, 109.11184692382812, 389.40582275390625, 687.2120361328125, 1084.9583740234375, 27.179019927978516, 282.314453125, 148.82894897460938, 233.316650390625, 308.84326171875, 2647.434814453125, 222.3636016845703, -425.0522155761719, 926.7694702148438, 294.787353515625, 1429.3013916015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000666.npy"}
|
||||
{"epoch": 0.9779735682819384, "step": 667, "batch_size": 64, "mean": 493.6817321777344, "std": 643.7807006835938, "min": -719.4481201171875, "p10": -303.0472290039062, "median": 444.4435272216797, "p90": 1231.3046630859378, "max": 2321.8681640625, "pos_frac": 0.78125, "sample": [447.1406555175781, -499.00726318359375, 693.2042236328125, -719.4481201171875, -52.638214111328125, 1016.0032958984375, 969.7071533203125, 540.2616577148438, 494.9554443359375, 1268.33056640625, 223.89149475097656, 98.25717163085938, 105.33185577392578, 400.4385986328125, 335.85205078125, -692.9469604492188, 802.946044921875, 560.2156372070312, 1688.12158203125, 886.7997436523438, 796.212890625, 230.45797729492188, 1144.910888671875, 122.02201080322266, 599.36572265625, 327.32159423828125, 1028.5364990234375, -135.2469482421875, 1015.450927734375, 761.1941528320312, 786.720458984375, 566.7183227539062, 962.9400024414062, 623.0394287109375, 422.1952209472656, -321.6210632324219, 391.02154541015625, 441.74639892578125, 367.5676574707031, -40.15528106689453, -150.93125915527344, 1541.9560546875, 1837.9481201171875, 1.2905502319335938, 729.7284545898438, 1472.02490234375, 690.5293579101562, 243.01461791992188, 158.13525390625, 725.6353759765625, -20.534828186035156, 819.9310302734375, -398.28216552734375, 259.590576171875, 2321.8681640625, 2099.258056640625, 81.3933334350586, 1051.56640625, -598.3892211914062, 319.2522277832031, -689.0977172851562, -259.7082824707031, -52.692169189453125, 754.3280639648438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000667.npy"}
|
||||
{"epoch": 0.9794419970631424, "step": 668, "batch_size": 64, "mean": 347.7958679199219, "std": 771.0363159179688, "min": -1302.610107421875, "p10": -528.6578308105469, "median": 285.7936096191406, "p90": 1265.0007934570312, "max": 2774.34326171875, "pos_frac": 0.6875, "sample": [481.8768615722656, 274.86724853515625, 1333.7769775390625, -1219.252197265625, 214.74342346191406, 884.5366821289062, 1355.720703125, 178.4342498779297, 1074.82275390625, 463.06890869140625, -605.4319458007812, 670.8151245117188, 120.47100067138672, 77.52035522460938, 391.56610107421875, 1100.6666259765625, 164.2113800048828, -21.151229858398438, -442.3121337890625, -529.6677856445312, -986.568603515625, 989.5248413085938, -475.0069580078125, 827.1597290039062, -160.52578735351562, 655.8869018554688, 385.4344787597656, 468.2151794433594, 36.31379699707031, 758.7865600585938, 326.45843505859375, 640.320556640625, 1241.6923828125, -526.30126953125, 1037.5755615234375, 436.12799072265625, -931.8313598632812, 1074.6790771484375, 85.87085723876953, -191.81210327148438, 250.71417236328125, -55.81806182861328, 1338.36767578125, -109.87615203857422, -600.1593627929688, 1168.3585205078125, -130.60833740234375, -402.16241455078125, 308.4740295410156, -41.93824005126953, 365.6939697265625, 202.60052490234375, 296.719970703125, 195.21810913085938, -165.45077514648438, 1274.9901123046875, 406.7944641113281, 393.08087158203125, 2386.4716796875, -1302.610107421875, -79.45531463623047, 2065.6904296875, 58.2138671875, 2774.34326171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000668.npy"}
|
||||
{"epoch": 0.9809104258443465, "step": 669, "batch_size": 64, "mean": 498.524169921875, "std": 657.339111328125, "min": -1383.6656494140625, "p10": -212.85424194335934, "median": 415.4113311767578, "p90": 1390.7096801757814, "max": 2472.658935546875, "pos_frac": 0.8125, "sample": [226.11082458496094, 52.55998229980469, -68.3540267944336, 1022.134765625, -859.8917236328125, 383.22979736328125, 664.6256103515625, 755.73486328125, 405.0622863769531, -81.62053680419922, 2472.658935546875, -231.21902465820312, 28.138839721679688, 280.6777648925781, -295.1313171386719, 408.6443786621094, 1575.9197998046875, -87.26073455810547, 104.43134307861328, 190.33689880371094, 549.1357421875, 736.1002197265625, 1575.1302490234375, 1293.7161865234375, 89.29478454589844, 1774.97509765625, 72.0023193359375, 1405.765380859375, 1017.2611083984375, 79.79345703125, 604.588623046875, 1295.0224609375, -170.00308227539062, 623.0535888671875, 1448.3192138671875, -587.1884155273438, -232.62991333007812, 851.490478515625, 745.3282470703125, 832.21630859375, 1355.5797119140625, 320.4652404785156, 148.8193359375, 323.1876525878906, -1383.6656494140625, 792.3921508789062, 924.736328125, 657.640869140625, 423.3935546875, 271.0084228515625, -495.2043762207031, 1011.521728515625, 422.17828369140625, 431.0364685058594, 733.5415649414062, 742.0709838867188, 53.29338836669922, 265.0502624511719, 488.7674865722656, 1230.5694580078125, -15.16872787475586, 1503.00732421875, 380.2542419433594, 370.9103088378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000669.npy"}
|
||||
{"epoch": 0.9823788546255506, "step": 670, "batch_size": 64, "mean": 497.284912109375, "std": 660.2279663085938, "min": -1287.3046875, "p10": -248.16761932373046, "median": 374.3037872314453, "p90": 1391.0453491210938, "max": 1766.820068359375, "pos_frac": 0.765625, "sample": [227.13406372070312, 486.79241943359375, 246.53421020507812, 1371.2335205078125, 472.2629089355469, -148.33526611328125, -175.50729370117188, -54.81964111328125, 1070.1124267578125, 597.37744140625, 305.66265869140625, -74.15283203125, -1287.3046875, 1766.820068359375, 1176.8060302734375, 1761.549072265625, 714.0218505859375, 348.5641784667969, 1166.440673828125, 664.8121337890625, 123.20668029785156, -513.6094970703125, 1349.8687744140625, -258.259033203125, -568.8501586914062, 376.6502380371094, 1487.954833984375, 34.440921783447266, 1083.5897216796875, -520.138916015625, 1491.6864013671875, 783.5413208007812, -235.3527374267578, 215.28933715820312, 237.34738159179688, 1399.5361328125, 1256.162841796875, -232.5728302001953, 214.93698120117188, 1305.48828125, 384.10931396484375, 1218.97412109375, 866.99853515625, -240.30059814453125, 1704.7908935546875, 899.753173828125, 371.95733642578125, 334.9976806640625, -251.53919982910156, -275.26171875, 46.586639404296875, 1434.908203125, 105.06854248046875, 275.61309814453125, 1360.270263671875, 140.5283966064453, 299.2280578613281, 1020.8779907226562, -3.091796875, 887.1300659179688, 438.1571044921875, 519.5617065429688, 42.2496337890625, 577.7469482421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000670.npy"}
|
||||
{"epoch": 0.9838472834067548, "step": 671, "batch_size": 64, "mean": 399.7263488769531, "std": 658.7308349609375, "min": -1557.872314453125, "p10": -220.95528717041014, "median": 362.93568420410156, "p90": 1241.62431640625, "max": 1863.3753662109375, "pos_frac": 0.75, "sample": [48.74790954589844, -1557.872314453125, 486.7471618652344, 100.21682739257812, -133.78079223632812, 751.5239868164062, 275.1359558105469, 332.01971435546875, 660.8050537109375, 408.90997314453125, 433.11376953125, -186.19046020507812, 137.5526885986328, 1125.7734375, 989.91943359375, 48.70982360839844, 235.76893615722656, 224.32757568359375, 997.895263671875, 771.4048461914062, 131.45211791992188, 1471.009765625, -1487.426025390625, 1181.09619140625, 814.769287109375, -56.81996154785156, 1817.761474609375, 417.0871276855469, 265.62310791015625, 613.6925659179688, -125.97476196289062, -606.7095947265625, -438.7189025878906, 520.6226196289062, 1318.8095703125, -154.65365600585938, 367.177978515625, 278.8195495605469, 658.8809204101562, 1247.68994140625, 1380.9886474609375, 370.5566711425781, -194.98683166503906, 60.541893005371094, 811.7918701171875, -29.389419555664062, 258.1588134765625, 1015.0296630859375, -322.6619567871094, -90.19913482666016, 713.9024047851562, 443.3728942871094, -82.37091827392578, 255.79339599609375, 1227.47119140625, 933.0338745117188, -232.08462524414062, -650.8709106445312, 214.70143127441406, 1401.327880859375, 388.90869140625, 1863.3753662109375, 1102.48486328125, 358.6933898925781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000671.npy"}
|
||||
{"epoch": 0.9853157121879589, "step": 672, "batch_size": 64, "mean": 404.5885925292969, "std": 744.872802734375, "min": -1355.709716796875, "p10": -547.9264678955077, "median": 355.21607971191406, "p90": 1294.62890625, "max": 2706.104248046875, "pos_frac": 0.6875, "sample": [422.49285888671875, -46.952239990234375, -351.19439697265625, 186.72601318359375, -187.87294006347656, 152.73744201660156, 735.4442138671875, 402.33160400390625, 1687.180419921875, 258.9657287597656, 92.86125946044922, -116.59229278564453, -402.0025939941406, -274.60760498046875, 539.3702392578125, 247.27432250976562, -151.76919555664062, -664.4442749023438, 460.77313232421875, 1689.8206787109375, 86.3131103515625, 858.2139892578125, -2.8209152221679688, -135.48532104492188, 1279.2711181640625, -1355.709716796875, 1143.58154296875, -937.4879150390625, 487.0029296875, 2706.104248046875, -134.31344604492188, 948.2716674804688, 941.9765625, 1118.92431640625, 1154.5506591796875, 775.200439453125, 578.2020263671875, -610.4652709960938, 214.42044067382812, 2013.605712890625, -876.19580078125, -81.83077239990234, 322.0357666015625, 1265.375732421875, 857.91552734375, 643.5909423828125, 914.4364013671875, 205.5941162109375, 762.6331787109375, 600.8480834960938, 1472.968505859375, 347.0832824707031, -135.23583984375, 713.4417724609375, 75.595703125, 778.000732421875, 1301.2108154296875, 62.06389617919922, -76.48918151855469, -745.7604370117188, 595.4073486328125, 363.348876953125, 1399.1458740234375, -681.4124755859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000672.npy"}
|
||||
{"epoch": 0.986784140969163, "step": 673, "batch_size": 64, "mean": 387.2952880859375, "std": 636.2482299804688, "min": -1311.568359375, "p10": -314.7584014892578, "median": 389.45518493652344, "p90": 1035.1853454589848, "max": 3110.2158203125, "pos_frac": 0.78125, "sample": [-215.17466735839844, -655.8583984375, 478.5237731933594, 922.2774047851562, 342.96673583984375, 550.1705322265625, 638.2029418945312, 349.16595458984375, 1250.63720703125, 445.9229431152344, -172.4651336669922, 1450.0377197265625, 532.2978515625, 635.099609375, 118.43663024902344, 168.271728515625, -481.448974609375, 836.6311645507812, 1685.75830078125, 333.05804443359375, -325.05816650390625, 352.58935546875, 382.5273742675781, -79.45152282714844, 195.3700408935547, -1311.568359375, 180.88381958007812, 668.1663818359375, -424.5685729980469, 195.66383361816406, 733.4388427734375, 1398.71435546875, 146.5169219970703, -44.05586624145508, 765.3292846679688, 678.1139526367188, 396.38299560546875, -549.8684692382812, 507.98980712890625, 803.458251953125, 756.0598754882812, 44.73700714111328, 177.54580688476562, 706.75146484375, 453.252685546875, 88.87445831298828, 521.6317749023438, -200.42062377929688, 455.1713562011719, 3110.2158203125, 113.83113861083984, 77.67768859863281, 567.4407958984375, 627.7698974609375, -748.8797607421875, 554.552001953125, 527.3956909179688, 1083.574462890625, 119.99237060546875, -290.7256164550781, -59.641014099121094, 610.6984252929688, 1248.26318359375, 358.0431213378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000673.npy"}
|
||||
{"epoch": 0.9882525697503671, "step": 674, "batch_size": 64, "mean": 493.2326965332031, "std": 663.4459228515625, "min": -587.7816162109375, "p10": -249.51350250244136, "median": 438.3824157714844, "p90": 1107.173815917969, "max": 3108.033447265625, "pos_frac": 0.765625, "sample": [-79.67828369140625, 36.46439743041992, 854.2694702148438, 675.4672241210938, 688.2077026367188, 747.400390625, 32.82945251464844, -105.61116027832031, -496.0224914550781, 1124.6676025390625, 1371.6563720703125, 900.5538330078125, 59.20900344848633, 495.342041015625, -194.2872772216797, 504.134765625, -68.45947265625, 2033.9404296875, 673.725830078125, 3108.033447265625, 556.7169189453125, 487.9278259277344, 42.47617721557617, 1013.0081176757812, 435.69769287109375, -273.181884765625, 1032.3424072265625, 441.067138671875, 547.5869140625, -132.90625, 1066.35498046875, 1994.7037353515625, -125.34927368164062, 198.415283203125, 377.59967041015625, 821.6136474609375, -319.18511962890625, -394.72125244140625, -332.0863952636719, 885.3176879882812, 662.8920288085938, 296.7344055175781, 352.09423828125, 1689.666259765625, 317.2503662109375, 954.838623046875, 841.976806640625, 980.9944458007812, 522.5158081054688, 146.90309143066406, -587.7816162109375, 209.8511962890625, 1842.9703369140625, -308.2030334472656, -21.436182022094727, 659.239990234375, 172.04638671875, 742.3086547851562, 321.1130065917969, 566.332275390625, 163.26199340820312, -84.83808898925781, 159.42337036132812, 281.49554443359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000674.npy"}
|
||||
{"epoch": 0.9897209985315712, "step": 675, "batch_size": 64, "mean": 506.56787109375, "std": 669.2509155273438, "min": -597.33447265625, "p10": -86.0140399932861, "median": 363.5639953613281, "p90": 1395.0937133789064, "max": 2719.42236328125, "pos_frac": 0.78125, "sample": [246.4782257080078, 1405.534423828125, 543.1369018554688, 122.50227355957031, 47.94329071044922, 140.3657684326172, 816.3010864257812, -34.61128234863281, 583.7874145507812, 293.7241516113281, 894.11865234375, -218.15625, 605.30859375, 1111.342529296875, -98.95012664794922, -35.133026123046875, 160.96450805664062, 1321.6351318359375, 483.8099670410156, 489.6487121582031, 774.42236328125, 5.612144470214844, 2265.00830078125, 789.9226684570312, 24.54167938232422, 370.3861389160156, -55.829837799072266, 706.1675415039062, 598.9083251953125, 434.73663330078125, 382.3838806152344, 1491.7998046875, 329.5283203125, 1791.41650390625, -46.728912353515625, 1269.56591796875, 2719.42236328125, -267.44183349609375, 976.5541381835938, 2036.444091796875, 280.1583557128906, 80.22640991210938, -43.249900817871094, 75.10601806640625, 312.84490966796875, -54.507965087890625, -570.7361450195312, 238.59262084960938, 648.4486083984375, 260.8489074707031, 129.68455505371094, 1468.76123046875, 732.2401123046875, -597.33447265625, -543.6295776367188, 400.5364990234375, 108.791015625, 693.157958984375, -25.081146240234375, 1370.7320556640625, 356.7418518066406, 1100.5499267578125, 993.20361328125, -472.3127746582031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000675.npy"}
|
||||
{"epoch": 0.9911894273127754, "step": 676, "batch_size": 64, "mean": 481.5036315917969, "std": 605.1775512695312, "min": -898.1253662109375, "p10": -150.88591003417966, "median": 410.8320617675781, "p90": 1380.472900390625, "max": 2394.810302734375, "pos_frac": 0.765625, "sample": [2.5683822631835938, 1580.1798095703125, 825.7559204101562, 956.0459594726562, -65.80162048339844, -99.2825698852539, 447.6086730957031, 704.909423828125, 58.75949478149414, -898.1253662109375, 564.2432861328125, 43.2159423828125, -105.54286193847656, 1167.567138671875, 1668.923583984375, 2394.810302734375, -131.2286376953125, -173.7090301513672, -20.635482788085938, -164.68978881835938, 4.019811630249023, 500.4134521484375, 597.5869140625, -65.12149047851562, -159.31045532226562, 791.0096435546875, 99.77851867675781, 1363.089111328125, 349.8870544433594, 1736.08154296875, -48.988372802734375, 684.49560546875, 271.91888427734375, 723.40234375, 104.41455841064453, 794.2451782226562, 1549.5908203125, 429.039794921875, 524.8046875, 1387.923095703125, 757.1760864257812, -45.107017517089844, 147.53294372558594, -365.2386474609375, 1343.8656005859375, 970.7860107421875, 64.03607177734375, 990.4701538085938, 506.911865234375, 392.62432861328125, 803.5693359375, 636.455078125, 251.4493865966797, 371.2049560546875, 68.56900787353516, 242.1409912109375, 869.4136962890625, -216.4552001953125, 1414.349609375, 151.20562744140625, 495.7027587890625, 257.93878173828125, -183.08302307128906, 496.86041259765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000676.npy"}
|
||||
{"epoch": 0.9926578560939795, "step": 677, "batch_size": 64, "mean": 465.65484619140625, "std": 754.8594360351562, "min": -912.7601318359375, "p10": -182.20745544433592, "median": 344.06785583496094, "p90": 1213.0600463867188, "max": 3902.89013671875, "pos_frac": 0.765625, "sample": [90.94629669189453, 3006.06298828125, 611.3859252929688, 5.885887145996094, 484.2826843261719, -172.51202392578125, -155.8359375, -333.5796813964844, 457.68353271484375, -43.0081787109375, 550.5761108398438, 524.2423095703125, 89.78355407714844, 1205.59033203125, 304.933837890625, 887.7507934570312, 901.6141967773438, 31.832839965820312, 299.02215576171875, 174.1401824951172, 1227.5908203125, 312.6849060058594, 1093.77587890625, -175.8273468017578, 354.21820068359375, 1309.501708984375, 108.88160705566406, 496.0964050292969, 368.668701171875, 1144.1217041015625, 392.0023193359375, -82.21151733398438, -184.94178771972656, 246.06695556640625, 208.54229736328125, -662.6420288085938, 902.21142578125, 302.2952880859375, 3902.89013671875, 171.06365966796875, 204.96511840820312, 512.139404296875, 975.8942260742188, 787.8831176757812, -144.34829711914062, 1216.2613525390625, 405.88665771484375, 303.6084899902344, 756.93701171875, 189.35292053222656, 1351.8641357421875, -10.130359649658203, 794.94921875, 333.9175109863281, 538.0828857421875, -431.2708435058594, -912.7601318359375, 1748.885498046875, 547.6646118164062, 687.174560546875, 727.7330322265625, -288.91668701171875, -780.9130249023438, -68.73739624023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000677.npy"}
|
||||
{"epoch": 0.9941262848751835, "step": 678, "batch_size": 64, "mean": 544.760009765625, "std": 748.979736328125, "min": -1823.683349609375, "p10": -90.13137969970703, "median": 450.89044189453125, "p90": 1213.4886718750001, "max": 2753.530029296875, "pos_frac": 0.796875, "sample": [271.0369567871094, 941.7806396484375, 558.3042602539062, 631.49951171875, 502.446533203125, -81.44149780273438, 556.0328979492188, -1823.683349609375, 90.58978271484375, -274.5692443847656, -87.22425842285156, 1239.3914794921875, 72.57579040527344, 1176.9449462890625, -72.1822738647461, 555.18408203125, 851.4332885742188, -364.87469482421875, 909.2156372070312, 845.2969360351562, 984.6689453125, 1080.865478515625, 14.767715454101562, 2389.836181640625, 1166.668701171875, 2753.530029296875, 87.37919616699219, 261.536376953125, -13.490795135498047, 1153.819580078125, -488.6672058105469, 193.01559448242188, 275.59344482421875, 768.1983032226562, 274.38055419921875, 1016.0059814453125, 215.01229858398438, 1162.9041748046875, 262.27496337890625, 926.7415161132812, -201.70127868652344, 910.3773803710938, 1229.1502685546875, 1731.79638671875, 354.67388916015625, 267.73931884765625, 2390.031982421875, 2344.1806640625, 80.59135437011719, 574.2049560546875, 279.32952880859375, -595.863037109375, 399.3343505859375, -2.4058074951171875, 43.075340270996094, 68.28700256347656, 602.8865966796875, 1108.7635498046875, 626.227783203125, 520.2271728515625, 390.0557556152344, -91.37728881835938, 938.382080078125, -86.12386322021484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000678.npy"}
|
||||
{"epoch": 0.9955947136563876, "step": 679, "batch_size": 64, "mean": 384.6784362792969, "std": 626.41162109375, "min": -769.24951171875, "p10": -290.1199600219726, "median": 286.2602844238281, "p90": 1356.7357543945316, "max": 2123.605224609375, "pos_frac": 0.734375, "sample": [585.2535400390625, 61.63013458251953, 1271.2421875, 2050.54345703125, 471.1752624511719, 706.6802368164062, 780.70556640625, 417.8222961425781, 1393.3758544921875, 341.3777770996094, 430.55426025390625, 472.527587890625, 436.9147644042969, 168.52134704589844, 1039.876953125, -411.3751525878906, -152.53890991210938, 85.10653686523438, 212.94732666015625, 939.312744140625, 630.9860229492188, 1567.1015625, -223.10939025878906, 116.69546508789062, 136.13880920410156, -318.8387756347656, 359.05902099609375, 1577.563720703125, -473.8596496582031, 529.0020751953125, 793.861328125, 64.7109375, -69.06645202636719, 36.71674346923828, 987.367431640625, 367.11474609375, 1593.173583984375, 145.7448272705078, -188.87451171875, 72.8975601196289, 298.3038635253906, -55.40821838378906, 405.1810607910156, -32.149261474609375, -363.7513122558594, 1499.6727294921875, 274.2167053222656, -193.63290405273438, -769.24951171875, -160.27212524414062, 957.552734375, -671.395751953125, 538.0047607421875, -192.8158416748047, 530.83935546875, 259.3592224121094, -59.27114486694336, 200.81607055664062, 757.6627197265625, 2123.605224609375, 207.15139770507812, 648.8533935546875, -700.2548217773438, 110.36225128173828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000679.npy"}
|
||||
{"epoch": 0.9970631424375918, "step": 680, "batch_size": 64, "mean": 513.3322143554688, "std": 767.4342041015625, "min": -2263.132080078125, "p10": -232.54038391113272, "median": 392.4099578857422, "p90": 1549.5302856445314, "max": 2415.0927734375, "pos_frac": 0.8125, "sample": [1187.626220703125, 490.5404968261719, 395.68487548828125, 958.501708984375, 148.9308319091797, 1152.1971435546875, 1824.7354736328125, 589.8768920898438, 199.27359008789062, 389.1350402832031, 544.9705810546875, 188.0784454345703, 807.7135009765625, -397.6634216308594, 1086.1248779296875, 1474.296630859375, 741.6050415039062, -644.0548095703125, -277.28466796875, 880.4332275390625, 1190.158447265625, 225.82196044921875, -118.56452941894531, 343.6864318847656, -279.6149597167969, 980.117919921875, 831.0507202148438, 172.94248962402344, 635.2283935546875, 1564.355712890625, 196.89657592773438, 45.32306671142578, 195.03012084960938, 1762.0357666015625, 1105.709228515625, 157.46568298339844, 176.5796356201172, 531.765625, 2055.609375, -780.8399047851562, 134.5504150390625, 247.57388305664062, 462.23809814453125, 580.0997314453125, 153.30029296875, -2263.132080078125, 1913.6817626953125, -108.38105010986328, -569.6212768554688, 1514.9376220703125, 707.197509765625, 2415.0927734375, 21.011962890625, 5.0981903076171875, 146.06161499023438, 1487.341552734375, 733.0565185546875, 1660.4256591796875, -37.10153579711914, 136.78070068359375, -128.13705444335938, 227.95486450195312, -19.458770751953125, 701.2124633789062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000680.npy"}
|
||||
{"epoch": 0.9985315712187959, "step": 681, "batch_size": 64, "mean": 398.695068359375, "std": 649.7728271484375, "min": -898.1030883789062, "p10": -392.05385131835936, "median": 396.9673767089844, "p90": 1172.8136108398442, "max": 2215.88330078125, "pos_frac": 0.6875, "sample": [791.093994140625, -383.638427734375, 514.31494140625, 176.34783935546875, 892.2001953125, -898.1030883789062, 186.41146850585938, 412.2407531738281, -452.7953186035156, 266.3453369140625, -254.65185546875, 381.6940002441406, -463.6009521484375, -395.66046142578125, 1216.748779296875, 1640.306884765625, 463.027587890625, -56.26079559326172, -270.19384765625, 1032.575439453125, -225.40625, -10.202140808105469, 854.6502685546875, 479.0786437988281, 1782.391357421875, 714.0509643554688, 208.67581176757812, 992.03564453125, 957.1810302734375, 594.6284790039062, 109.08747100830078, 495.4100341796875, 214.9635009765625, -88.22732543945312, -171.4639892578125, 211.6799774169922, 202.3099822998047, -623.967041015625, 494.3758239746094, -126.43333435058594, -349.66693115234375, 2215.88330078125, -759.2943115234375, 904.7896118164062, -97.79521942138672, 523.6024169921875, -468.5655822753906, 205.7639923095703, 39.94679260253906, -264.67156982421875, 999.0328979492188, -298.0276184082031, 888.3651733398438, 581.992431640625, 1366.73828125, 502.58673095703125, 1051.8670654296875, 1070.2982177734375, 1048.85693359375, 692.8165283203125, 144.38072204589844, 1373.112060546875, 1405.905517578125, 875.3475341796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p3-20260429-085449/margin_logs/step_0000681.npy"}
|
||||
3
margin_logs/step_0000001.npy
Normal file
3
margin_logs/step_0000001.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:cb7ed5e9b5d6de6c4e509dd17cf5d9c91337fabd0c174e116c5e60872823ad93
|
||||
size 384
|
||||
3
margin_logs/step_0000002.npy
Normal file
3
margin_logs/step_0000002.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fc23171824afa57340cda53f69d83aef67c7c0b95175e9ec4a3a7bc3c221bc4f
|
||||
size 384
|
||||
3
margin_logs/step_0000003.npy
Normal file
3
margin_logs/step_0000003.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ef9fba6337a44bc530377336d3f3a7b1c18ba2344d0b825de24c378235189a9e
|
||||
size 384
|
||||
3
margin_logs/step_0000004.npy
Normal file
3
margin_logs/step_0000004.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8f58f909c6d7042bfeef50ff30480bfe6440a4d8606bfef76d98727242990fac
|
||||
size 384
|
||||
3
margin_logs/step_0000005.npy
Normal file
3
margin_logs/step_0000005.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e3d132f2cfb375ad7363c4337963aba216f000d5442bcef686d06a58bfe1d2cb
|
||||
size 384
|
||||
3
margin_logs/step_0000006.npy
Normal file
3
margin_logs/step_0000006.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:52f2b7c6e42e54c55fec6fac4d4c5021fd7bf385906f2848ef1b4cb5ea4ca024
|
||||
size 384
|
||||
3
margin_logs/step_0000007.npy
Normal file
3
margin_logs/step_0000007.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b6ce95263774ccca4765825c0c1210107b13dc26b721aeeb6fa8c110128f3270
|
||||
size 384
|
||||
3
margin_logs/step_0000008.npy
Normal file
3
margin_logs/step_0000008.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:61d4aef506176d6cd9a50254d05b75b7969a64b60ce80db5efc8a27bb13a8a1b
|
||||
size 384
|
||||
3
margin_logs/step_0000009.npy
Normal file
3
margin_logs/step_0000009.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:935d35a8bec624c65cc5ac00b4e80d214d38d879d8c5e30c627b86c314afc35b
|
||||
size 384
|
||||
3
margin_logs/step_0000010.npy
Normal file
3
margin_logs/step_0000010.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e25766c3b5d8aa1f00e29dbdd190430b009186f35c9ca8f0cf730af1e9107ff4
|
||||
size 384
|
||||
3
margin_logs/step_0000011.npy
Normal file
3
margin_logs/step_0000011.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5f2add72b56b4366ff6c34072c06568cca67d04ff435993995515864841d53c1
|
||||
size 384
|
||||
3
margin_logs/step_0000012.npy
Normal file
3
margin_logs/step_0000012.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bcaae42d038f49b165c1aec3c3b931354a29d359466a30db5324c25079fd9883
|
||||
size 384
|
||||
3
margin_logs/step_0000013.npy
Normal file
3
margin_logs/step_0000013.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0ceec6cd0d93c64610e80eb9b4ef2d728fb90548fd8d20ee18f2acb185c97fcd
|
||||
size 384
|
||||
3
margin_logs/step_0000014.npy
Normal file
3
margin_logs/step_0000014.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:dcb56f28c4ca7dbfa49f285b467f14bec912f568fd7baa2bf9bb14234fc8166c
|
||||
size 384
|
||||
3
margin_logs/step_0000015.npy
Normal file
3
margin_logs/step_0000015.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7436e70b4005f569b618569c0809eaeb620415bbcc3768c13e60055e180bb48f
|
||||
size 384
|
||||
3
margin_logs/step_0000016.npy
Normal file
3
margin_logs/step_0000016.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:581b82c313787c63242098cb9dda955def33c19b46f22a302dc794b50fdce027
|
||||
size 384
|
||||
3
margin_logs/step_0000017.npy
Normal file
3
margin_logs/step_0000017.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2b40d6c93d746b5512f4cc522be884eb36bbb403e450b3a003dcac2f47c1a767
|
||||
size 384
|
||||
3
margin_logs/step_0000018.npy
Normal file
3
margin_logs/step_0000018.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:43579a251c90dba6d5d8e319af66c8a9c6e92224f55ab7e5e663c350f91d20ab
|
||||
size 384
|
||||
3
margin_logs/step_0000019.npy
Normal file
3
margin_logs/step_0000019.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6704adac9fc9006944d8a23ed22a7575ab09ec0da315a95e3bd2f67da4e5667c
|
||||
size 384
|
||||
3
margin_logs/step_0000020.npy
Normal file
3
margin_logs/step_0000020.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:25663e631359598337bc3bdec98a38f08419265c49665e63b6439873241f7bd0
|
||||
size 384
|
||||
3
margin_logs/step_0000021.npy
Normal file
3
margin_logs/step_0000021.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b0e6c0c8a924f95cb96290eb94f7f06246761f443f1531d1cd9e3da8038f853d
|
||||
size 384
|
||||
3
margin_logs/step_0000022.npy
Normal file
3
margin_logs/step_0000022.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7cd7002b7a43d3daa5b3d5ddbba9a5f6b85a130cea88a10bb7b71f264b8daef1
|
||||
size 384
|
||||
3
margin_logs/step_0000023.npy
Normal file
3
margin_logs/step_0000023.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a961eae4dc8209af06886717537c0d951d700d2af35bf9b2ab9207878fb7b159
|
||||
size 384
|
||||
3
margin_logs/step_0000024.npy
Normal file
3
margin_logs/step_0000024.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bc6f8091fb2f0bf20bfeac737fc7f10ca45531123fe5cc776c7e6be1df191836
|
||||
size 384
|
||||
3
margin_logs/step_0000025.npy
Normal file
3
margin_logs/step_0000025.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fd4d75b21a0697e91897a8d36b3c2a5888fd4edd2ef64a1ecf478a603855f4a9
|
||||
size 384
|
||||
3
margin_logs/step_0000026.npy
Normal file
3
margin_logs/step_0000026.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5a09b979ab01a090cce1281a4336aabf000f7877d1190c82f71b9e296507e54e
|
||||
size 384
|
||||
3
margin_logs/step_0000027.npy
Normal file
3
margin_logs/step_0000027.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:41b2ce42cdf4bf89214c9152f9c9a5ee87f8486c25a427906adf722212e48e68
|
||||
size 384
|
||||
3
margin_logs/step_0000028.npy
Normal file
3
margin_logs/step_0000028.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d141d867e5375398959cfa5296c0b98a2c4c46c5c011ba7733a2de2f988df410
|
||||
size 384
|
||||
3
margin_logs/step_0000029.npy
Normal file
3
margin_logs/step_0000029.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:559825bfc21493e8460e285029820e2842222d175c59e2773cedb62a8b7eca24
|
||||
size 384
|
||||
3
margin_logs/step_0000030.npy
Normal file
3
margin_logs/step_0000030.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:724e5ecd7767d272c3ab60eb60359950dd7cd6c7d552547c45e93aad660645f2
|
||||
size 384
|
||||
3
margin_logs/step_0000031.npy
Normal file
3
margin_logs/step_0000031.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:50c7654fda1a38acbfbcf7dfe9f0c3406ee8bdcd25cb1e76ff030653fadf6a77
|
||||
size 384
|
||||
3
margin_logs/step_0000032.npy
Normal file
3
margin_logs/step_0000032.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:df4f7736613b1836e8f41e45eadee86ce73975b22c9b6659f1db0c142fba1a7c
|
||||
size 384
|
||||
3
margin_logs/step_0000033.npy
Normal file
3
margin_logs/step_0000033.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:25c6950a6bb9d8289bbd115d602cb5bebec59e8c6ddd4d814aedd9eb80872bb2
|
||||
size 384
|
||||
3
margin_logs/step_0000034.npy
Normal file
3
margin_logs/step_0000034.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a6e8de61efbd6d6d6a289eb34b96de8598a2fb86347747e3d111f31759dcf01d
|
||||
size 384
|
||||
3
margin_logs/step_0000035.npy
Normal file
3
margin_logs/step_0000035.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c087630a8db942a7972bcaaf1660979dc4a9a0787e44d9a56ccc1dc7f6e11150
|
||||
size 384
|
||||
3
margin_logs/step_0000036.npy
Normal file
3
margin_logs/step_0000036.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0dba428671372838a925fc0606d920e891c97ca9761fd757f907064c4b310ec1
|
||||
size 384
|
||||
3
margin_logs/step_0000037.npy
Normal file
3
margin_logs/step_0000037.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5b8923775dbc15704c8715b7b5499ef7775a0ea46e64c96b973aa5d66d2abc11
|
||||
size 384
|
||||
3
margin_logs/step_0000038.npy
Normal file
3
margin_logs/step_0000038.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e1f2d9b5b07220dfb92cb0da3576062fbd0aeb72c4c6cf27c890cd601f05ebf5
|
||||
size 384
|
||||
3
margin_logs/step_0000039.npy
Normal file
3
margin_logs/step_0000039.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a0095086339959a25ed89ae837bc3349f9edf071a08c0857ee45808bdd0172e2
|
||||
size 384
|
||||
3
margin_logs/step_0000040.npy
Normal file
3
margin_logs/step_0000040.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5ce08e49255e2f141fb1625e702d3213cb18410a8f29b500c4df9177013f0153
|
||||
size 384
|
||||
3
margin_logs/step_0000041.npy
Normal file
3
margin_logs/step_0000041.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a7939b3e3472549f4ccdb0cebe73b7f0593d76c0c4a5b3f3bbe3f827719a0d35
|
||||
size 384
|
||||
3
margin_logs/step_0000042.npy
Normal file
3
margin_logs/step_0000042.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c27cdf15a7bbafe5fd805f083e53c03fe133daee4b1e9edb2b0c90720d664622
|
||||
size 384
|
||||
3
margin_logs/step_0000043.npy
Normal file
3
margin_logs/step_0000043.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:82c940ba18682dd58345d1b9bb80c22e63b0b5cdc2a06450047f8c14c055aa60
|
||||
size 384
|
||||
3
margin_logs/step_0000044.npy
Normal file
3
margin_logs/step_0000044.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1560bc1b328b20b59ec46c1b6518ff557af034c73e22be5317d7b1f4152f9533
|
||||
size 384
|
||||
3
margin_logs/step_0000045.npy
Normal file
3
margin_logs/step_0000045.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ff46c441e4ecb022d6c54acae1868ab8ef1e1be74fe8f01b388d5c97dcd7f089
|
||||
size 384
|
||||
3
margin_logs/step_0000046.npy
Normal file
3
margin_logs/step_0000046.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:23586d2326a79917c2d5c0fa01e8e861ed3b23640177817657f99db327cbf85b
|
||||
size 384
|
||||
3
margin_logs/step_0000047.npy
Normal file
3
margin_logs/step_0000047.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d18cbfa8e56622dcef7c44b37ef476ba4283510a3d08df14a8013ff16e09767c
|
||||
size 384
|
||||
3
margin_logs/step_0000048.npy
Normal file
3
margin_logs/step_0000048.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d5fdd5336f2718e7e7f4290772808e1bbd065a8f5cd4a55dd698b682acb7547d
|
||||
size 384
|
||||
3
margin_logs/step_0000049.npy
Normal file
3
margin_logs/step_0000049.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6d389a7dd706fa628f585d5c6727f8e72fc1cb46815b2768e7e8628fcd36a80d
|
||||
size 384
|
||||
3
margin_logs/step_0000050.npy
Normal file
3
margin_logs/step_0000050.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:38985c5f22c211939acc59c838ea4d00fb1c6de3b3206a5d14f0025345d7b20e
|
||||
size 384
|
||||
3
margin_logs/step_0000051.npy
Normal file
3
margin_logs/step_0000051.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:09b43e818d28fdf018cbab25d2081263ae3961a4706ac3fc5dd0d3c63e805425
|
||||
size 384
|
||||
3
margin_logs/step_0000052.npy
Normal file
3
margin_logs/step_0000052.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fbe7385b508cb66b1a1b195568321921b3a903aa7a487cf75106ee358fc9a3dd
|
||||
size 384
|
||||
3
margin_logs/step_0000053.npy
Normal file
3
margin_logs/step_0000053.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:52ee7bed64438e80df63e70be09c9fd75ab6f05834129e4af304c3cca0f74d3f
|
||||
size 384
|
||||
3
margin_logs/step_0000054.npy
Normal file
3
margin_logs/step_0000054.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d192a8f37fcf80933876fce9daa05611c27241ccfb90d0c51918d2800bdab683
|
||||
size 384
|
||||
3
margin_logs/step_0000055.npy
Normal file
3
margin_logs/step_0000055.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1ad925a821805ef7a095d04f3f08709e40f97b0e71ff09d707401f397a137a3d
|
||||
size 384
|
||||
3
margin_logs/step_0000056.npy
Normal file
3
margin_logs/step_0000056.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4cbae878fdab71af87cbb60bf09987183f95250a4efe717bd5e35eac9a80e7d8
|
||||
size 384
|
||||
3
margin_logs/step_0000057.npy
Normal file
3
margin_logs/step_0000057.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7e83510ed5fed4951db0d8408aea88630aabeb685280badb8bdaee5d0beebbaf
|
||||
size 384
|
||||
3
margin_logs/step_0000058.npy
Normal file
3
margin_logs/step_0000058.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d1823ac5aae9ce45440d9ea4b6888e4e0aa9ece5328ea0be5b9d571741fb18f7
|
||||
size 384
|
||||
3
margin_logs/step_0000059.npy
Normal file
3
margin_logs/step_0000059.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e83f6037338a135fdfd762b9591509e8089a3919cc7f203d736ab889293707b5
|
||||
size 384
|
||||
3
margin_logs/step_0000060.npy
Normal file
3
margin_logs/step_0000060.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:eb5e5bf05d4cdda386af2dde17bb0f1a3ed9ecc856d0be454d3ac6dc4bdbb92b
|
||||
size 384
|
||||
3
margin_logs/step_0000061.npy
Normal file
3
margin_logs/step_0000061.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4753a0fc117809720b940883da5570354851e1470b8946f7999c646e80c3f876
|
||||
size 384
|
||||
3
margin_logs/step_0000062.npy
Normal file
3
margin_logs/step_0000062.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f513d6abc36b523e7676dc491fe749742efc6f573f58e255b812030e17230a73
|
||||
size 384
|
||||
3
margin_logs/step_0000063.npy
Normal file
3
margin_logs/step_0000063.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:acd5015e2cf72a396759bc912b8036d5601eb1e2ffc4cf540ad81368addc423e
|
||||
size 384
|
||||
3
margin_logs/step_0000064.npy
Normal file
3
margin_logs/step_0000064.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d6d890429955117a9e8d2ee7d3c2d7513623807f81e1aa7a6a1685b7c318d9f0
|
||||
size 384
|
||||
3
margin_logs/step_0000065.npy
Normal file
3
margin_logs/step_0000065.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:76043ab36ade8d78e90be78370993ce6e42d1e491674ee81dcab9aba21867e30
|
||||
size 384
|
||||
3
margin_logs/step_0000066.npy
Normal file
3
margin_logs/step_0000066.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f80bd0623f8f5b4c4e71162c50ce501c3921260f999453597663facd255fd4a3
|
||||
size 384
|
||||
3
margin_logs/step_0000067.npy
Normal file
3
margin_logs/step_0000067.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:49d477cc99352ef488628b6fd779aba8a4d8df18ca966346813bddd62876e5e3
|
||||
size 384
|
||||
3
margin_logs/step_0000068.npy
Normal file
3
margin_logs/step_0000068.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b6c5c5c9e3327d809c4225b27870bc4ab9613451e5fa3661f77ccf994a91bd89
|
||||
size 384
|
||||
3
margin_logs/step_0000069.npy
Normal file
3
margin_logs/step_0000069.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:66e1c310e31bbf66888936920ec51ab8264e58880dbfc72773a763fe11b26b93
|
||||
size 384
|
||||
3
margin_logs/step_0000070.npy
Normal file
3
margin_logs/step_0000070.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7a95cf2e0421595187d95b0feacbf9309a41ff00da27f8f6bf62b2182c4b4c74
|
||||
size 384
|
||||
3
margin_logs/step_0000071.npy
Normal file
3
margin_logs/step_0000071.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fc830c5e1faf1e983f2e59babee707b80b5c83d2aa77eb600635dbf504faa347
|
||||
size 384
|
||||
3
margin_logs/step_0000072.npy
Normal file
3
margin_logs/step_0000072.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8ab9f5163325cc9905185d071d014d5947d4380fbc23fe3136a63f97e5436805
|
||||
size 384
|
||||
3
margin_logs/step_0000073.npy
Normal file
3
margin_logs/step_0000073.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c8212237f51ff71adc13c8f0bf849579ca514d3dcb446279aa3e960ce3909002
|
||||
size 384
|
||||
3
margin_logs/step_0000074.npy
Normal file
3
margin_logs/step_0000074.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f02fb8bafaa5393036a493922d5004a99dcb0cd4b69eaa037de958ae623dae16
|
||||
size 384
|
||||
3
margin_logs/step_0000075.npy
Normal file
3
margin_logs/step_0000075.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7beebb815a846e596888c6cae702da91441220f4ec344facc317c8c4ea49f68e
|
||||
size 384
|
||||
3
margin_logs/step_0000076.npy
Normal file
3
margin_logs/step_0000076.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4207f070e13ac4686642d524c881245c8036e963c41e2259db6c843e9848b4f7
|
||||
size 384
|
||||
3
margin_logs/step_0000077.npy
Normal file
3
margin_logs/step_0000077.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:35b187a12a73b3e891c84d80432f2c4baf148ee860506a3912e7301adbd37055
|
||||
size 384
|
||||
3
margin_logs/step_0000078.npy
Normal file
3
margin_logs/step_0000078.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:90d37a41a3f660e4b9eba8bd6f79cd97ce680f15920af61916027345e395c40b
|
||||
size 384
|
||||
3
margin_logs/step_0000079.npy
Normal file
3
margin_logs/step_0000079.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:88b9ffa84c372d38f558ddbce94ff1169d153f7666afb68f7eedb06b22f8460f
|
||||
size 384
|
||||
3
margin_logs/step_0000080.npy
Normal file
3
margin_logs/step_0000080.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f580702e4101fbf5f50472b8bfd10d95cddf0334e0fc87663e21e1ec79c492cc
|
||||
size 384
|
||||
3
margin_logs/step_0000081.npy
Normal file
3
margin_logs/step_0000081.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:24aa29085a8dbd2539d737bff03e2ff24e6662176a87d870eb49a46b4169f994
|
||||
size 384
|
||||
3
margin_logs/step_0000082.npy
Normal file
3
margin_logs/step_0000082.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9dd11d98c602fd9d53f7032ee69949d244b97c8c2c842f26a48b1abdc77a48e8
|
||||
size 384
|
||||
3
margin_logs/step_0000083.npy
Normal file
3
margin_logs/step_0000083.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:177628b3346f01db0088677f0257d7bd5f68e7286f7ffd0a44d7b8f35ee57379
|
||||
size 384
|
||||
3
margin_logs/step_0000084.npy
Normal file
3
margin_logs/step_0000084.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f30f62646c25553aa73cd248422dd6297a92ab983f34085871365d6a2e22b8a6
|
||||
size 384
|
||||
3
margin_logs/step_0000085.npy
Normal file
3
margin_logs/step_0000085.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:80ea580768779452f6da5ba76691bab00cd40d68df22bdc8de51cac0ae0d18ab
|
||||
size 384
|
||||
3
margin_logs/step_0000086.npy
Normal file
3
margin_logs/step_0000086.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b7f5dc8701f1ccd2c3bf2c76c1272028c8b51e6a528b4e293b299041e43316c8
|
||||
size 384
|
||||
3
margin_logs/step_0000087.npy
Normal file
3
margin_logs/step_0000087.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:42517d2620ae2a9b2f520ec8c32f7696f962018ce1254643d615e1e37b91446f
|
||||
size 384
|
||||
3
margin_logs/step_0000088.npy
Normal file
3
margin_logs/step_0000088.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:649abc07f6ccdbf629551da061eb84c8b77760b2e2618b713dfea65090de08b6
|
||||
size 384
|
||||
3
margin_logs/step_0000089.npy
Normal file
3
margin_logs/step_0000089.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e9139018e8b42b94012506a6c6bc8aef380aaccba1a595446c0f424652d1a5f7
|
||||
size 384
|
||||
3
margin_logs/step_0000090.npy
Normal file
3
margin_logs/step_0000090.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7ceb5255b2c0e358a8a57b69c7d1e8d00163f1d777f265de7c63307ad4578b69
|
||||
size 384
|
||||
3
margin_logs/step_0000091.npy
Normal file
3
margin_logs/step_0000091.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:179b5f88d2f3b56e297018db163b76ddae364c995af8e7dc5d35ac5c1b7d8260
|
||||
size 384
|
||||
3
margin_logs/step_0000092.npy
Normal file
3
margin_logs/step_0000092.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:27cc72c623eef3032b5981f4b2a2d606c1328f18713ef0065f4f82f188d4c9dd
|
||||
size 384
|
||||
3
margin_logs/step_0000093.npy
Normal file
3
margin_logs/step_0000093.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6bc263fbbdc03397992d97e684c42941c5ddde3b276692a7e80c2662df2ad143
|
||||
size 384
|
||||
3
margin_logs/step_0000094.npy
Normal file
3
margin_logs/step_0000094.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1b0e44435a5a6591f5a2e674569e2851c4e017cac38eb92281db73284cd266c4
|
||||
size 384
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user