初始化项目,由ModelHub XC社区提供模型
Model: W-61/llama3-hh-harmless-qt045-b0p5-20260429-085449 Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
62
README.md
Normal file
62
README.md
Normal file
@@ -0,0 +1,62 @@
|
||||
---
|
||||
library_name: transformers
|
||||
base_model: W-61/llama-3-8b-base-sft-hh-harmless-4xh200
|
||||
tags:
|
||||
- alignment-handbook
|
||||
- new-dpo
|
||||
- generated_from_trainer
|
||||
datasets:
|
||||
- Anthropic/hh-rlhf
|
||||
model-index:
|
||||
- name: llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449
|
||||
results: []
|
||||
---
|
||||
|
||||
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||
should probably proofread and complete it, then remove this comment. -->
|
||||
|
||||
# llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449
|
||||
|
||||
This model is a fine-tuned version of [W-61/llama-3-8b-base-sft-hh-harmless-4xh200](https://huggingface.co/W-61/llama-3-8b-base-sft-hh-harmless-4xh200) on the Anthropic/hh-rlhf dataset.
|
||||
|
||||
## Model description
|
||||
|
||||
More information needed
|
||||
|
||||
## Intended uses & limitations
|
||||
|
||||
More information needed
|
||||
|
||||
## Training and evaluation data
|
||||
|
||||
More information needed
|
||||
|
||||
## Training procedure
|
||||
|
||||
### Training hyperparameters
|
||||
|
||||
The following hyperparameters were used during training:
|
||||
- learning_rate: 5e-07
|
||||
- train_batch_size: 8
|
||||
- eval_batch_size: 8
|
||||
- seed: 42
|
||||
- distributed_type: multi-GPU
|
||||
- num_devices: 4
|
||||
- gradient_accumulation_steps: 2
|
||||
- total_train_batch_size: 64
|
||||
- total_eval_batch_size: 32
|
||||
- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
||||
- lr_scheduler_type: cosine
|
||||
- lr_scheduler_warmup_ratio: 0.1
|
||||
- num_epochs: 1
|
||||
|
||||
### Training results
|
||||
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- Transformers 4.51.0
|
||||
- Pytorch 2.3.1+cu121
|
||||
- Datasets 2.21.0
|
||||
- Tokenizers 0.21.4
|
||||
9
all_results.json
Normal file
9
all_results.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"epoch": 0.999244142101285,
|
||||
"total_flos": 0.0,
|
||||
"train_loss": 1.1404347123068148,
|
||||
"train_runtime": 1649.8929,
|
||||
"train_samples": 42336,
|
||||
"train_samples_per_second": 25.66,
|
||||
"train_steps_per_second": 0.401
|
||||
}
|
||||
29
config.json
Normal file
29
config.json
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"architectures": [
|
||||
"LlamaForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 128000,
|
||||
"eos_token_id": 128001,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 14336,
|
||||
"max_position_embeddings": 8192,
|
||||
"mlp_bias": false,
|
||||
"model_type": "llama",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 32,
|
||||
"num_key_value_heads": 8,
|
||||
"pretraining_tp": 1,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 500000.0,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "float32",
|
||||
"transformers_version": "4.51.0",
|
||||
"use_cache": true,
|
||||
"vocab_size": 128256
|
||||
}
|
||||
9
generation_config.json
Normal file
9
generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"bos_token_id": 128000,
|
||||
"do_sample": true,
|
||||
"eos_token_id": 128001,
|
||||
"max_length": 4096,
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.9,
|
||||
"transformers_version": "4.51.0"
|
||||
}
|
||||
661
margin_logs/margins.jsonl
Normal file
661
margin_logs/margins.jsonl
Normal file
@@ -0,0 +1,661 @@
|
||||
{"epoch": 0.0, "step": 1, "batch_size": 64, "mean": -0.0013527870178222656, "std": 0.2564818859100342, "min": -0.736083984375, "p10": -0.3432229995727539, "median": 0.038166046142578125, "p90": 0.29227676391601565, "max": 0.645111083984375, "pos_frac": 0.578125, "sample": [0.1120758056640625, 0.12518310546875, 0.31621551513671875, 0.13765716552734375, -0.12592506408691406, 0.23141098022460938, -0.21887779235839844, 0.21950721740722656, 0.04480743408203125, 0.020877838134765625, 0.0570220947265625, 0.058269500732421875, -0.4338226318359375, -0.030628204345703125, 0.645111083984375, -0.395477294921875, 0.09050941467285156, 0.0007190704345703125, -0.34615325927734375, 0.016077041625976562, -0.33638572692871094, 0.293853759765625, 0.17610931396484375, 0.22386932373046875, 0.21470260620117188, -0.08536529541015625, 0.0907745361328125, -0.03816986083984375, 0.39190101623535156, 0.16336441040039062, 0.08024787902832031, -0.031158447265625, 0.08477020263671875, 0.002460479736328125, -0.242034912109375, 0.07232666015625, -0.60186767578125, 0.20531463623046875, 0.155731201171875, -0.14299774169921875, -0.25698089599609375, 0.12331962585449219, -0.26497650146484375, 0.15140533447265625, -0.0920257568359375, -0.18599319458007812, 0.19028091430664062, 0.2496490478515625, 0.42162322998046875, 0.17873382568359375, -0.1525421142578125, -0.4972076416015625, 0.32010650634765625, -0.10365867614746094, -0.233795166015625, -0.19828224182128906, -0.4018898010253906, -0.13407135009765625, -0.09596633911132812, 0.031524658203125, 0.28859710693359375, -0.192962646484375, -0.736083984375, 0.3026123046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000001.npy"}
|
||||
{"epoch": 0.0015117157974300832, "step": 2, "batch_size": 64, "mean": 0.03744968771934509, "std": 0.2875921130180359, "min": -0.7604827880859375, "p10": -0.2812448501586914, "median": 0.03963661193847656, "p90": 0.3654294967651367, "max": 0.8134727478027344, "pos_frac": 0.5625, "sample": [0.30594635009765625, -0.24289894104003906, -0.11509323120117188, -0.13417816162109375, 0.06942558288574219, 0.36568641662597656, -0.14640045166015625, 0.1497650146484375, 0.30261993408203125, 0.10124588012695312, 0.13028717041015625, -0.0031890869140625, 0.0361480712890625, 0.5662612915039062, 0.09694290161132812, -0.01091766357421875, 0.1128997802734375, 0.0411834716796875, -0.21860504150390625, -0.1236419677734375, -0.08812713623046875, 0.10360527038574219, 0.1790008544921875, -0.5114288330078125, 0.3056755065917969, -0.14553451538085938, 0.28168487548828125, 0.26990509033203125, 0.1686878204345703, 0.038089752197265625, 0.19541168212890625, -0.10783576965332031, -0.2644004821777344, -0.19707489013671875, -0.140472412109375, 0.1349811553955078, 0.19672012329101562, -0.0714111328125, 0.53369140625, 0.1271820068359375, 0.8134727478027344, 0.2990264892578125, -0.7604827880859375, -0.08274078369140625, 0.05890846252441406, 0.029361724853515625, 0.4510040283203125, -0.1599273681640625, -0.29346656799316406, 0.10005569458007812, -0.27509117126464844, -0.1937713623046875, 0.19167327880859375, 0.28173065185546875, -0.09406471252441406, -0.3380699157714844, -0.29186248779296875, 0.36483001708984375, 0.009979248046875, 0.44391632080078125, -0.126708984375, -0.6550216674804688, 0.6160736083984375, -0.28388214111328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000002.npy"}
|
||||
{"epoch": 0.0030234315948601664, "step": 3, "batch_size": 64, "mean": -0.026467204093933105, "std": 0.30806809663772583, "min": -0.784454345703125, "p10": -0.39578437805175776, "median": -0.01042938232421875, "p90": 0.3263589859008789, "max": 0.8639678955078125, "pos_frac": 0.46875, "sample": [0.001163482666015625, -0.08046150207519531, -0.3637809753417969, -0.6114959716796875, 0.5206451416015625, 0.474334716796875, -0.05446434020996094, -0.047565460205078125, 0.507843017578125, -0.29026031494140625, -0.0962677001953125, -0.784454345703125, -0.2994232177734375, 0.007829666137695312, 0.22295379638671875, 0.0484161376953125, -0.5504074096679688, -0.29926300048828125, -0.0406341552734375, -0.31705474853515625, -0.2654876708984375, 0.10183143615722656, -0.0093536376953125, 0.008876800537109375, -0.4095001220703125, 0.5073604583740234, 0.32064056396484375, 0.108123779296875, -0.1256084442138672, -0.006374359130859375, 0.15889549255371094, -0.21315765380859375, -0.073974609375, 0.039459228515625, -0.26339149475097656, -0.2775382995605469, -0.011505126953125, -0.529541015625, 0.04657173156738281, 0.37990570068359375, 0.8639678955078125, 0.0887908935546875, 0.09635162353515625, 0.2778167724609375, 0.20387649536132812, 0.17584228515625, -0.0767974853515625, 0.16618728637695312, 0.10390853881835938, 0.08072662353515625, -0.17749404907226562, -0.18267822265625, 0.29253387451171875, 0.3288097381591797, 0.10744857788085938, -0.0176849365234375, -0.02597808837890625, -0.3246002197265625, -0.16367340087890625, -0.636962890625, -0.5841102600097656, 0.0580596923828125, -0.08154296875, 0.29941558837890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000003.npy"}
|
||||
{"epoch": 0.0045351473922902496, "step": 4, "batch_size": 64, "mean": -0.000735849142074585, "std": 0.3251829743385315, "min": -0.7379608154296875, "p10": -0.3816089630126953, "median": -0.0397491455078125, "p90": 0.398516845703125, "max": 1.124603271484375, "pos_frac": 0.484375, "sample": [-0.6450653076171875, 0.313934326171875, 0.11295318603515625, -0.26505279541015625, 0.2827301025390625, -0.2507171630859375, 0.12739181518554688, 0.139007568359375, -0.18805694580078125, -0.1126861572265625, -0.09664154052734375, 0.16539382934570312, -0.7379608154296875, -0.1085357666015625, -0.3671226501464844, -0.42315673828125, 0.4523468017578125, 0.01682281494140625, 0.25516510009765625, -0.6936492919921875, -0.0546722412109375, -0.09468460083007812, -0.09212493896484375, -0.211883544921875, 0.13368988037109375, -0.077423095703125, -0.1053009033203125, 0.11155509948730469, -0.31156158447265625, 0.0389404296875, 1.124603271484375, 0.4645271301269531, 0.16115570068359375, -0.0248260498046875, -0.141510009765625, 0.398590087890625, -0.711944580078125, 0.23684310913085938, 0.0775299072265625, -0.16431427001953125, -0.084259033203125, 0.01828765869140625, 0.48940277099609375, -0.16755294799804688, 0.2043609619140625, 0.49834442138671875, -0.20343780517578125, -0.05751800537109375, 0.13211822509765625, 0.282470703125, 0.3699951171875, 0.0261688232421875, -0.23458480834960938, -0.2521820068359375, 0.0743560791015625, 0.398345947265625, -0.3878173828125, 0.4511871337890625, -0.1587982177734375, -0.06616973876953125, -0.39371490478515625, 0.159454345703125, 0.2762870788574219, -0.1561279296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000004.npy"}
|
||||
{"epoch": 0.006046863189720333, "step": 5, "batch_size": 64, "mean": -0.05532556772232056, "std": 0.3274867832660675, "min": -0.804901123046875, "p10": -0.4211006164550781, "median": -0.03682136535644531, "p90": 0.38465423583984376, "max": 0.773712158203125, "pos_frac": 0.484375, "sample": [-0.2521839141845703, -0.43244171142578125, 0.08309173583984375, -0.12054443359375, 0.2080230712890625, -0.764129638671875, 0.009998321533203125, 0.5464591979980469, -0.11246490478515625, 0.010894775390625, 0.03937530517578125, -0.2769927978515625, -0.18562698364257812, 0.049591064453125, -0.6250495910644531, -0.2512245178222656, 0.18170928955078125, 0.0556793212890625, -0.32178497314453125, -0.3232231140136719, 0.015865325927734375, 0.16365623474121094, 0.0001087188720703125, -0.0732269287109375, -0.804901123046875, 0.377166748046875, -0.23590087890625, -0.37310028076171875, -0.3946380615234375, 0.4186515808105469, 0.773712158203125, -0.3120880126953125, -0.496826171875, 0.2612876892089844, 0.121490478515625, -0.27447509765625, -0.2103748321533203, 0.05696868896484375, 0.19446754455566406, -0.2066478729248047, 0.007495880126953125, -0.21966552734375, 0.16746139526367188, 0.647216796875, 0.5327606201171875, 0.001354217529296875, 0.3878631591796875, -0.2585906982421875, -0.04050445556640625, -0.3151702880859375, -0.044483184814453125, 0.127349853515625, 0.16587448120117188, 0.5523681640625, -0.2476806640625, -0.20369720458984375, -0.6238746643066406, -0.3471641540527344, 0.10945701599121094, -0.033138275146484375, 0.07110023498535156, -0.47139739990234375, -0.2930946350097656, 0.2669715881347656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000005.npy"}
|
||||
{"epoch": 0.007558578987150416, "step": 6, "batch_size": 64, "mean": -0.03294065594673157, "std": 0.334528386592865, "min": -0.913330078125, "p10": -0.5180587768554688, "median": 0.0030155181884765625, "p90": 0.31352233886718756, "max": 0.818939208984375, "pos_frac": 0.5, "sample": [-0.24460983276367188, -0.082855224609375, -0.07027053833007812, 0.47766876220703125, 0.2522735595703125, -0.2201213836669922, 0.2179126739501953, 0.132659912109375, 0.19260406494140625, -0.18257522583007812, -0.02895355224609375, -0.564361572265625, -0.002117156982421875, 0.21797943115234375, 0.097991943359375, -0.8743743896484375, 0.03498077392578125, 0.36100006103515625, 0.0656890869140625, -0.5546092987060547, 0.2686309814453125, -0.45349884033203125, -0.014776229858398438, 0.43657875061035156, 0.009586334228515625, -0.5018768310546875, -0.1355133056640625, -0.60736083984375, -0.22824859619140625, 0.3746490478515625, 0.13639450073242188, 0.1471271514892578, 0.818939208984375, 0.3000640869140625, -0.41649627685546875, 0.3192901611328125, -0.13897323608398438, -0.26058197021484375, -0.22223663330078125, 0.13864898681640625, -0.07811737060546875, 0.20152854919433594, 0.019561767578125, 0.2884941101074219, -0.5757980346679688, 0.09881591796875, 0.158111572265625, 0.16363906860351562, -0.18694114685058594, 0.6169319152832031, 0.14965057373046875, 0.1793060302734375, -0.36277008056640625, -0.1388568878173828, -0.3451995849609375, -0.013330459594726562, -0.1307525634765625, -0.26575469970703125, -0.913330078125, -0.524993896484375, 0.008148193359375, 0.2486095428466797, 0.18943023681640625, -0.09084320068359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000006.npy"}
|
||||
{"epoch": 0.009070294784580499, "step": 7, "batch_size": 64, "mean": 0.027201414108276367, "std": 0.27945253252983093, "min": -0.53271484375, "p10": -0.38431758880615235, "median": 0.028045654296875, "p90": 0.3642986297607423, "max": 0.6094856262207031, "pos_frac": 0.546875, "sample": [0.3422660827636719, -0.4445648193359375, -0.0229644775390625, 0.12061882019042969, 0.12801170349121094, 0.19467926025390625, 0.3210620880126953, 0.37374114990234375, 0.5938186645507812, 0.12848281860351562, -0.1185760498046875, 0.06992912292480469, 0.05941963195800781, 0.6094856262207031, 0.0850830078125, 0.0088043212890625, -0.008190155029296875, 0.1583709716796875, 0.010009765625, -0.0753021240234375, 0.48472023010253906, -0.4220542907714844, -0.15843963623046875, 0.26340484619140625, -0.07452392578125, -0.22349929809570312, -0.4534912109375, 0.0434417724609375, 0.06383514404296875, -0.0710601806640625, -0.2490386962890625, -0.18888092041015625, -0.3873920440673828, 0.5598983764648438, 0.0126495361328125, 0.1131439208984375, -0.169281005859375, 0.322052001953125, 0.0439453125, 0.3756389617919922, 0.2447967529296875, -0.21262550354003906, 0.2395172119140625, -0.006378173828125, -0.53271484375, 0.24211883544921875, -0.37453460693359375, 0.19302940368652344, -0.07343673706054688, 0.2902374267578125, -0.11879158020019531, -0.49407196044921875, 0.15056610107421875, -0.022098541259765625, 0.1414642333984375, -0.37714385986328125, -0.43582916259765625, -0.21628570556640625, -0.1100006103515625, 0.5872650146484375, -0.19756317138671875, 0.29308319091796875, 0.20915985107421875, -0.09812736511230469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000007.npy"}
|
||||
{"epoch": 0.010582010582010581, "step": 8, "batch_size": 64, "mean": 0.01508358120918274, "std": 0.33053112030029297, "min": -0.701141357421875, "p10": -0.3820446014404297, "median": -0.006184577941894531, "p90": 0.38071670532226565, "max": 0.915374755859375, "pos_frac": 0.5, "sample": [0.06950187683105469, 0.0268707275390625, 0.525177001953125, 0.015380859375, -0.5462799072265625, 0.2935791015625, 0.26692962646484375, 0.5990447998046875, -0.2589836120605469, -0.701141357421875, -0.6101226806640625, 0.10308837890625, -0.21381759643554688, -0.4907188415527344, 0.915374755859375, 0.37468719482421875, 0.24211883544921875, -0.093414306640625, -0.16348648071289062, 0.4127197265625, 0.02591705322265625, -0.493408203125, -0.4302215576171875, -0.0435791015625, 0.011493682861328125, -0.3889350891113281, 0.15003395080566406, 0.19770050048828125, 0.3167152404785156, -0.13804244995117188, 0.3731536865234375, -0.22614288330078125, -0.030553817749023438, -0.07485198974609375, -0.21953582763671875, -0.026611328125, 0.38330078125, -0.041774749755859375, 0.677001953125, 0.0617218017578125, 0.7795486450195312, -0.019243240356445312, 0.06887435913085938, 0.1699981689453125, -0.23135757446289062, -0.23108673095703125, 0.21282386779785156, -0.2890663146972656, -0.3628730773925781, 0.3446693420410156, -0.15419769287109375, 0.09365272521972656, -0.24091720581054688, -0.34787750244140625, -0.0486602783203125, 0.19917678833007812, -0.053646087646484375, -0.365966796875, 0.00687408447265625, 0.19046783447265625, -0.04225349426269531, -0.217010498046875, 0.3601531982421875, 0.2933769226074219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000008.npy"}
|
||||
{"epoch": 0.012093726379440665, "step": 9, "batch_size": 64, "mean": 0.06939518451690674, "std": 0.38108107447624207, "min": -0.7335968017578125, "p10": -0.40671615600585925, "median": -0.0066623687744140625, "p90": 0.46354522705078127, "max": 1.3619384765625, "pos_frac": 0.46875, "sample": [0.01285552978515625, -0.0765838623046875, -0.12518692016601562, 0.707672119140625, -0.07204818725585938, -0.6035270690917969, -0.00693511962890625, -0.0802154541015625, -0.0597686767578125, -0.6768951416015625, -0.46006011962890625, -0.18218040466308594, 0.23169326782226562, -0.28600311279296875, -0.042205810546875, -0.5128707885742188, -0.4584503173828125, 0.900787353515625, 0.13245773315429688, 0.3322467803955078, 0.4114837646484375, 0.1623859405517578, 0.01678466796875, -0.000316619873046875, -0.07101058959960938, 0.7720794677734375, 0.3403587341308594, 0.4615478515625, -0.1991748809814453, -0.11272048950195312, 0.11043548583984375, 0.16714859008789062, 0.30397796630859375, 0.4644012451171875, 0.15460777282714844, -0.019819259643554688, -0.048213958740234375, 0.17457199096679688, 0.351409912109375, -0.11065673828125, -0.04674530029296875, -0.5468578338623047, -0.0701141357421875, -0.0079803466796875, 0.018550872802734375, 0.201629638671875, 0.10201263427734375, 0.2948436737060547, -0.031322479248046875, -0.0852508544921875, 0.22138214111328125, -0.1589202880859375, 0.14947891235351562, 0.4452362060546875, 0.5741596221923828, 1.085784912109375, 0.07665061950683594, -0.0839691162109375, -0.006389617919921875, -0.041652679443359375, 1.3619384765625, -0.7335968017578125, -0.25165557861328125, -0.0299835205078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000009.npy"}
|
||||
{"epoch": 0.013605442176870748, "step": 10, "batch_size": 64, "mean": -0.03192758560180664, "std": 0.3887459635734558, "min": -0.8181838989257812, "p10": -0.488827896118164, "median": -0.06359672546386719, "p90": 0.4195741653442383, "max": 1.1913299560546875, "pos_frac": 0.4375, "sample": [-0.313446044921875, -0.39395713806152344, 0.10821151733398438, 0.03303718566894531, -0.11057662963867188, -0.1141204833984375, -0.3388519287109375, -0.1675872802734375, -0.3540496826171875, 0.134857177734375, -0.29257965087890625, 0.14371871948242188, -0.13309478759765625, -0.3870849609375, -0.31005859375, 0.1324615478515625, 0.056484222412109375, -0.0865020751953125, 0.351837158203125, 0.23499679565429688, -0.09083747863769531, 0.2096881866455078, 0.8912811279296875, 0.4878692626953125, -0.5249061584472656, -0.5074119567871094, -0.09452056884765625, -0.79876708984375, -0.061527252197265625, -0.0526580810546875, 0.1162261962890625, 0.41419219970703125, -0.11606597900390625, 0.21869659423828125, 0.2679424285888672, -0.254180908203125, -0.8181838989257812, 0.15421676635742188, 0.576507568359375, 0.09271049499511719, -0.2541332244873047, -0.05737113952636719, -0.690704345703125, -0.0619659423828125, -0.12703704833984375, -0.06522750854492188, -0.4277229309082031, -0.38694000244140625, 1.1913299560546875, -0.62005615234375, 0.2510833740234375, 0.3883857727050781, 0.044467926025390625, 0.3288459777832031, -0.3548622131347656, -0.11101531982421875, 0.760101318359375, 0.42188072204589844, 0.0417633056640625, -0.3690032958984375, 0.11476325988769531, -0.445465087890625, 0.5964431762695312, -0.514892578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000010.npy"}
|
||||
{"epoch": 0.015117157974300832, "step": 11, "batch_size": 64, "mean": 0.03709983825683594, "std": 0.29452747106552124, "min": -0.89794921875, "p10": -0.28243217468261717, "median": 0.033641815185546875, "p90": 0.3785514831542971, "max": 0.82550048828125, "pos_frac": 0.546875, "sample": [-0.12969970703125, 0.14381027221679688, 0.03549957275390625, 0.01139068603515625, -0.29500579833984375, -0.006702423095703125, 0.0971832275390625, -0.04356956481933594, -0.2970733642578125, -0.1237640380859375, -0.20676422119140625, 0.10626602172851562, 0.40241241455078125, 0.2616233825683594, -0.11142349243164062, 0.11792755126953125, 0.4458122253417969, 0.61529541015625, -0.16290283203125, 0.82550048828125, 0.10228347778320312, 0.2225494384765625, 0.4717559814453125, -0.03878021240234375, -0.251251220703125, 0.1056060791015625, 0.29259490966796875, 0.2078704833984375, -0.1432209014892578, 0.2508697509765625, 0.0036716461181640625, 0.052703857421875, -0.14935302734375, 0.3015289306640625, -0.1870136260986328, 0.23065185546875, 0.19989776611328125, 0.0317840576171875, 0.086883544921875, -0.544647216796875, -0.144866943359375, 0.4753074645996094, -0.32834625244140625, -0.038116455078125, -0.024204254150390625, -0.89794921875, -0.2538948059082031, 0.24443626403808594, 0.2509613037109375, 0.6405792236328125, 0.18323898315429688, -0.23162841796875, -0.10583114624023438, -0.19494247436523438, 0.3228759765625, 0.2460479736328125, -0.08451652526855469, 0.18158721923828125, -0.0403594970703125, -0.6476593017578125, -0.12202835083007812, 0.22840118408203125, -0.2946624755859375, 0.0777587890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000011.npy"}
|
||||
{"epoch": 0.016628873771730914, "step": 12, "batch_size": 64, "mean": 0.02898406982421875, "std": 0.2918440103530884, "min": -0.618682861328125, "p10": -0.27025299072265624, "median": 0.010526657104492188, "p90": 0.3871011734008789, "max": 0.895965576171875, "pos_frac": 0.515625, "sample": [-0.21789932250976562, -0.236083984375, -0.010547637939453125, 0.23181724548339844, -0.012882232666015625, 0.1946258544921875, 0.2821636199951172, 0.12065887451171875, 0.05323219299316406, -0.12808990478515625, -0.34600067138671875, 0.02777862548828125, 0.05129241943359375, 0.895965576171875, 0.38121986389160156, -0.260650634765625, -0.10895538330078125, -0.3141365051269531, 0.5126953125, 0.33400535583496094, -0.15038299560546875, 0.778564453125, -0.092742919921875, 0.556243896484375, -0.08496284484863281, -0.47147369384765625, -0.2124004364013672, -0.17560958862304688, -0.0670318603515625, 0.018436431884765625, -0.21303558349609375, 0.1133270263671875, -0.0526580810546875, -0.618682861328125, -0.035091400146484375, -0.14356613159179688, -0.36614227294921875, 0.17629241943359375, -0.25125885009765625, -0.10323333740234375, 0.0367279052734375, -0.21190834045410156, -0.12633514404296875, -0.2743682861328125, 0.11945343017578125, 0.5617733001708984, 0.1703948974609375, 0.07699966430664062, 0.26340484619140625, 0.008289337158203125, 0.3112335205078125, 0.24348068237304688, 0.3896217346191406, 0.491180419921875, 0.1533966064453125, 0.21453094482421875, -0.2550468444824219, 0.19445037841796875, -0.2319183349609375, -0.4177703857421875, 0.01276397705078125, 0.19446563720703125, 0.08451652526855469, -0.20915603637695312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000012.npy"}
|
||||
{"epoch": 0.018140589569160998, "step": 13, "batch_size": 64, "mean": -0.0720277726650238, "std": 0.2981038987636566, "min": -1.226898193359375, "p10": -0.4329349517822265, "median": -0.027555465698242188, "p90": 0.24675674438476564, "max": 0.5320892333984375, "pos_frac": 0.46875, "sample": [-0.6793212890625, -0.22695159912109375, -0.5779876708984375, 0.0510406494140625, 0.0830078125, -0.8348846435546875, -0.052036285400390625, 0.026203155517578125, 0.31725311279296875, 0.1308135986328125, -0.2161407470703125, 0.05778694152832031, 0.35434722900390625, 0.24814605712890625, 0.12253570556640625, 0.00041961669921875, 0.2003173828125, -0.2789497375488281, -0.373870849609375, 0.2435150146484375, -0.03833770751953125, -0.4827880859375, 0.0323028564453125, -0.0915374755859375, -0.218353271484375, -0.4582481384277344, -0.222015380859375, -0.09110260009765625, 0.0861663818359375, -0.08892822265625, -0.03314399719238281, -0.18428802490234375, 0.07793807983398438, -0.6200523376464844, -0.22423553466796875, 0.4327678680419922, 0.0801849365234375, -0.10396957397460938, 0.2227783203125, 0.0338134765625, -0.021144866943359375, 0.048755645751953125, -1.226898193359375, -0.05849266052246094, 0.19024658203125, -0.021966934204101562, -0.20038604736328125, 0.2529754638671875, -0.2371063232421875, 0.0474395751953125, -0.33803558349609375, 0.30521392822265625, -0.116790771484375, 0.018316268920898438, 0.5320892333984375, -0.30474853515625, -0.1697235107421875, 0.09992599487304688, -0.09664154052734375, 0.1970806121826172, 0.1207427978515625, -0.1640472412109375, -0.1815052032470703, 0.010728836059570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000013.npy"}
|
||||
{"epoch": 0.019652305366591082, "step": 14, "batch_size": 64, "mean": 0.04675331711769104, "std": 0.2829740047454834, "min": -0.57928466796875, "p10": -0.26424827575683596, "median": 0.01954174041748047, "p90": 0.4009868621826174, "max": 1.0258102416992188, "pos_frac": 0.53125, "sample": [0.21082687377929688, -0.342498779296875, -0.0288848876953125, 0.6094512939453125, 0.2474079132080078, 0.11258125305175781, 0.14655303955078125, 0.452789306640625, 0.026643753051757812, 0.003299713134765625, 0.0806884765625, 0.28910064697265625, 0.10963058471679688, -0.26610565185546875, -0.12281036376953125, -0.06346511840820312, -0.13937759399414062, -0.0332489013671875, -0.14267730712890625, -0.2599143981933594, -0.4324531555175781, -0.57928466796875, 0.20158767700195312, -0.15295791625976562, -0.08282470703125, -0.0388641357421875, 0.227203369140625, -0.0575714111328125, 0.4441986083984375, 1.0258102416992188, -0.06997299194335938, 0.672882080078125, 0.2770500183105469, 0.57452392578125, -0.07271957397460938, -0.04388427734375, 0.34616851806640625, 0.2356719970703125, 0.18181800842285156, -0.0214385986328125, -0.49078369140625, 0.4244804382324219, -0.2173309326171875, 0.0428466796875, 0.18241500854492188, -0.07367897033691406, -0.302520751953125, 0.24335479736328125, 0.08017921447753906, 0.0554046630859375, 0.027866363525390625, -0.1332550048828125, 0.07286834716796875, 0.3065948486328125, 0.04430389404296875, 0.022388458251953125, -0.465179443359375, -0.088653564453125, -0.21697044372558594, -0.10892295837402344, 0.016695022583007812, -0.01824951171875, -0.16029739379882812, 0.223724365234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000014.npy"}
|
||||
{"epoch": 0.021164021164021163, "step": 15, "batch_size": 64, "mean": -0.004044860601425171, "std": 0.3650330603122711, "min": -0.896636962890625, "p10": -0.4713296890258789, "median": 0.052730560302734375, "p90": 0.4221649169921875, "max": 0.8491058349609375, "pos_frac": 0.5625, "sample": [0.2226276397705078, -0.5859832763671875, -0.0039119720458984375, 0.030818939208984375, -0.46024131774902344, 0.41783905029296875, -0.2841644287109375, -0.10633087158203125, 0.265838623046875, -0.896636962890625, 0.39546966552734375, 0.20948410034179688, -0.187469482421875, -0.15079307556152344, 0.0765228271484375, 0.0846405029296875, 0.6846847534179688, 0.141845703125, 0.24211502075195312, -0.3853302001953125, 0.42401885986328125, -0.45514678955078125, 0.28092193603515625, -0.3143959045410156, -0.25363922119140625, 0.44487762451171875, 0.8491058349609375, -0.88031005859375, -0.148284912109375, 0.25421142578125, 0.10244369506835938, -0.1878662109375, -0.6669692993164062, -0.47608184814453125, 0.2724952697753906, 0.17955780029296875, 0.5002288818359375, 0.184814453125, -0.26903533935546875, 0.0773773193359375, -0.05413818359375, 0.07553482055664062, 0.03366851806640625, 0.7298355102539062, 0.07653617858886719, -0.7063446044921875, 0.0717926025390625, -0.07099151611328125, 0.11586952209472656, 0.1845855712890625, -0.7166976928710938, 0.016351699829101562, -0.22129249572753906, 0.28580474853515625, -0.0912322998046875, -0.08986663818359375, -0.15325927734375, -0.2624168395996094, 0.10551643371582031, 0.17606544494628906, 0.507171630859375, -0.1322784423828125, 0.207611083984375, 0.023954391479492188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000015.npy"}
|
||||
{"epoch": 0.022675736961451247, "step": 16, "batch_size": 64, "mean": -0.0014390945434570312, "std": 0.2676456868648529, "min": -0.6949920654296875, "p10": -0.29194107055664065, "median": -0.001750946044921875, "p90": 0.27339096069335955, "max": 1.2387771606445312, "pos_frac": 0.484375, "sample": [-0.10103988647460938, 0.0002460479736328125, -0.5128173828125, 0.11744499206542969, 0.12795639038085938, -0.29396820068359375, -0.03918647766113281, -0.29344940185546875, 1.2387771606445312, -0.12503814697265625, -0.4955101013183594, 0.34824371337890625, 0.10610580444335938, -0.37548065185546875, 0.017526626586914062, -0.3521995544433594, 0.2308483123779297, -0.03938865661621094, -0.0718994140625, 0.02374267578125, -0.6949920654296875, -0.2396240234375, 0.125946044921875, 0.08016586303710938, 0.11612510681152344, -0.0328521728515625, 0.1232147216796875, -0.1793365478515625, -0.1998310089111328, 0.31217193603515625, 0.168853759765625, 0.09315109252929688, -0.0019989013671875, 0.23319244384765625, 0.03932952880859375, 0.15770721435546875, 0.372955322265625, -0.288421630859375, -0.21757888793945312, -0.0049724578857421875, -0.21939659118652344, 0.06844711303710938, 0.15128707885742188, 0.29659271240234375, 0.087158203125, -0.12414360046386719, -0.08960723876953125, -0.11444091796875, -0.10034370422363281, -0.08826828002929688, -0.0978851318359375, -0.0381927490234375, -0.00150299072265625, -0.09572601318359375, 0.15177154541015625, 0.09606170654296875, -0.09012603759765625, 0.290618896484375, -0.2558135986328125, 0.09819602966308594, 0.06583976745605469, 0.1080322265625, 0.472137451171875, -0.1369171142578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000016.npy"}
|
||||
{"epoch": 0.02418745275888133, "step": 17, "batch_size": 64, "mean": -0.007782965898513794, "std": 0.3939519226551056, "min": -0.9858245849609375, "p10": -0.4283782958984375, "median": -0.02112102508544922, "p90": 0.43296928405761737, "max": 1.2611083984375, "pos_frac": 0.46875, "sample": [0.26639556884765625, 0.341888427734375, -0.03604888916015625, 0.021070480346679688, 1.2611083984375, 0.19382667541503906, 0.3124847412109375, 0.015962600708007812, 0.025165557861328125, -0.3603057861328125, -0.035369873046875, -0.13338851928710938, 0.019735336303710938, 0.20161819458007812, -0.3634605407714844, -0.046417236328125, -0.04296875, 0.0892486572265625, 0.0428009033203125, -0.43437957763671875, 0.6341476440429688, 0.14515113830566406, -0.809051513671875, -0.41437530517578125, -0.1272735595703125, -0.017320632934570312, -0.28430938720703125, -0.2633476257324219, -0.09591102600097656, -0.3519744873046875, -0.3950958251953125, -0.7104644775390625, -0.00971221923828125, 0.0558624267578125, 0.1562042236328125, 0.3909149169921875, 0.4509925842285156, 0.3878211975097656, -0.52178955078125, -0.616668701171875, 0.3433380126953125, -0.060150146484375, 0.06769561767578125, -0.08019256591796875, 0.095977783203125, 0.5644378662109375, -0.8414459228515625, 0.6620635986328125, 0.3119010925292969, 0.820098876953125, -0.15845108032226562, -0.9858245849609375, -0.0815582275390625, -0.20629501342773438, 0.4951019287109375, 0.20066070556640625, -0.024921417236328125, 0.16924285888671875, -0.22530364990234375, -0.14202499389648438, 0.32067108154296875, -0.257415771484375, -0.33655548095703125, -0.09192657470703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000017.npy"}
|
||||
{"epoch": 0.025699168556311415, "step": 18, "batch_size": 64, "mean": 0.008657693862915039, "std": 0.31698495149612427, "min": -0.9039688110351562, "p10": -0.3811187744140625, "median": 0.013964653015136719, "p90": 0.35109615325927734, "max": 0.8382568359375, "pos_frac": 0.515625, "sample": [-0.08412933349609375, 0.3527641296386719, 0.03644371032714844, -0.01869964599609375, 0.25354957580566406, -0.01035308837890625, 0.12689781188964844, 0.26903533935546875, 0.773956298828125, 0.212799072265625, 0.014875411987304688, 0.04693603515625, 0.1022796630859375, -0.1608295440673828, -0.18372344970703125, 0.521728515625, 0.21383285522460938, -0.49408721923828125, 0.16837310791015625, 0.0369415283203125, 0.03986358642578125, 0.7066802978515625, -0.21036529541015625, 0.4237022399902344, 0.1598663330078125, -0.05782127380371094, -0.477630615234375, 0.16974449157714844, -0.11788558959960938, 0.038829803466796875, 0.2186908721923828, -0.0649871826171875, -0.1145477294921875, -0.5888671875, 0.10481071472167969, 0.24391937255859375, -0.27733421325683594, -0.11348152160644531, -0.05419921875, -0.366058349609375, -0.0552825927734375, 0.8382568359375, -0.732025146484375, -0.492645263671875, 0.07238197326660156, 0.34720420837402344, -0.1513671875, -0.11197662353515625, -0.23860931396484375, -0.9039688110351562, -0.184326171875, 0.2056903839111328, -0.09964179992675781, 0.09661865234375, -0.3875732421875, -0.00278472900390625, -0.24750709533691406, 0.08864593505859375, 0.015960693359375, -0.042789459228515625, 0.223297119140625, 0.01305389404296875, 0.4679985046386719, -0.006038665771484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000018.npy"}
|
||||
{"epoch": 0.027210884353741496, "step": 19, "batch_size": 64, "mean": 0.03830514848232269, "std": 0.333337664604187, "min": -1.2857666015625, "p10": -0.3532302856445312, "median": 0.09617233276367188, "p90": 0.4104152679443361, "max": 0.6738433837890625, "pos_frac": 0.640625, "sample": [0.14453125, 0.0915374755859375, -0.08257675170898438, -0.373138427734375, -0.01216888427734375, 0.3748283386230469, 0.1711883544921875, -0.40549468994140625, -0.593902587890625, 0.160675048828125, -0.2675933837890625, 0.10080718994140625, 0.4908447265625, 0.17412567138671875, 0.279693603515625, -0.04763603210449219, -0.13248443603515625, 0.001537322998046875, 0.1399383544921875, 0.5279273986816406, -0.3067779541015625, 0.2228240966796875, 0.12322235107421875, -0.9122314453125, 0.2581634521484375, 0.4353485107421875, -0.602020263671875, -0.17337799072265625, -0.4192657470703125, -0.14547348022460938, 0.20680999755859375, 0.04688262939453125, 0.1196136474609375, 0.4257965087890625, -1.2857666015625, 0.208404541015625, -0.2567596435546875, -0.07904624938964844, 0.2657966613769531, -0.05964088439941406, 0.1399860382080078, 0.028470993041992188, 0.2257232666015625, -0.0127410888671875, 0.22559356689453125, 0.18751144409179688, 0.02463531494140625, 0.6738433837890625, -0.015798568725585938, 0.2503204345703125, 0.5477371215820312, 0.008539199829101562, 0.25511932373046875, 0.03677082061767578, 0.089630126953125, 0.3699455261230469, -0.22684478759765625, -0.08628463745117188, 0.16231346130371094, 0.42566680908203125, 0.2980785369873047, 0.04294586181640625, 0.2200775146484375, -0.23485183715820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000019.npy"}
|
||||
{"epoch": 0.02872260015117158, "step": 20, "batch_size": 64, "mean": 0.05611985921859741, "std": 0.2710525393486023, "min": -0.6413650512695312, "p10": -0.28497047424316396, "median": 0.05657005310058594, "p90": 0.40745239257812504, "max": 0.5962371826171875, "pos_frac": 0.59375, "sample": [0.22168731689453125, -0.19904327392578125, 0.0355682373046875, 0.13640594482421875, -0.08870124816894531, -0.6104965209960938, 0.2523040771484375, 0.4188232421875, 0.2607269287109375, -0.150390625, 0.0802001953125, 0.2858161926269531, -0.16057586669921875, 0.10096168518066406, -0.0610809326171875, -0.014678955078125, -0.6413650512695312, -0.3217964172363281, 0.39347076416015625, -0.528717041015625, 0.17140579223632812, -0.03431129455566406, -0.0525360107421875, 0.0022125244140625, 0.20570945739746094, 0.25313568115234375, 0.27782630920410156, -0.061611175537109375, 0.41344451904296875, 0.07340621948242188, -0.0025634765625, -0.3609809875488281, 0.2971038818359375, 0.37294769287109375, -0.15869140625, 0.4951057434082031, 0.18526458740234375, -0.12108612060546875, -0.16259002685546875, 0.42031097412109375, 0.15389251708984375, 0.0217132568359375, 0.37811279296875, -0.4189300537109375, 0.42180633544921875, 0.4299468994140625, 0.19860076904296875, 0.06392669677734375, 0.0351409912109375, -0.023956298828125, -0.5805282592773438, 0.066436767578125, -0.0914764404296875, 0.5962371826171875, 0.049213409423828125, -0.028821945190429688, 0.197540283203125, 0.13447952270507812, 0.3223876953125, -0.11447525024414062, -0.06841087341308594, 0.2935600280761719, -0.07712554931640625, 0.009778976440429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000020.npy"}
|
||||
{"epoch": 0.030234315948601664, "step": 21, "batch_size": 64, "mean": 0.032516419887542725, "std": 0.32473090291023254, "min": -1.153717041015625, "p10": -0.3045478820800781, "median": 0.04518699645996094, "p90": 0.40917892456054705, "max": 1.0348052978515625, "pos_frac": 0.5625, "sample": [0.04530525207519531, -0.14566612243652344, 0.1871356964111328, -0.1430816650390625, 0.21250534057617188, -0.2130279541015625, 0.1477680206298828, -0.16073036193847656, 0.045299530029296875, -0.2609691619873047, 0.0597381591796875, -0.019775390625, 0.6560516357421875, 0.36171722412109375, 0.17866897583007812, 0.5038871765136719, 0.11823272705078125, 0.5944137573242188, -0.36833953857421875, -0.001544952392578125, 0.12413787841796875, -0.012029647827148438, 0.28460693359375, -0.3145751953125, -0.07366943359375, -0.396636962890625, -0.2122783660888672, 0.27875518798828125, 0.4295196533203125, 0.03963279724121094, 0.360565185546875, 0.0029163360595703125, -0.75103759765625, -0.13645362854003906, -1.153717041015625, 0.3289203643798828, 1.0348052978515625, 0.2677421569824219, -0.141693115234375, -0.2029266357421875, 0.15472412109375, -0.28115081787109375, 0.49554443359375, 0.19400978088378906, 0.05448150634765625, -0.0033016204833984375, 0.0270233154296875, -0.06436920166015625, -0.3780174255371094, 0.1494884490966797, 0.1218109130859375, 0.48785400390625, 0.06569862365722656, -0.4169769287109375, -0.05397796630859375, 0.045074462890625, -0.020753860473632812, -0.026447296142578125, 0.07871818542480469, 0.1372222900390625, -0.262664794921875, 0.10715484619140625, 0.11133575439453125, -0.1956024169921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000021.npy"}
|
||||
{"epoch": 0.031746031746031744, "step": 22, "batch_size": 64, "mean": -0.008712053298950195, "std": 0.2997193932533264, "min": -0.9450836181640625, "p10": -0.42382621765136713, "median": -0.0040912628173828125, "p90": 0.36555347442626956, "max": 0.5687713623046875, "pos_frac": 0.484375, "sample": [-0.3714447021484375, -0.6014328002929688, -0.02001190185546875, -0.025852203369140625, 0.06714057922363281, 0.5687713623046875, 0.04981422424316406, -0.2931976318359375, 0.04372215270996094, 0.4217071533203125, 0.2886848449707031, -0.07611083984375, -0.06509017944335938, 0.13170433044433594, -0.007976531982421875, 0.01032257080078125, -0.44278717041015625, -0.13524246215820312, -0.0529937744140625, -0.6083602905273438, 0.20064544677734375, -0.1013641357421875, 0.42560577392578125, -0.12114143371582031, 0.3713207244873047, -0.0211639404296875, -0.012018203735351562, -0.25687217712402344, -0.1495342254638672, -0.07286834716796875, -0.7865447998046875, -0.04742431640625, -0.01377105712890625, 0.09474563598632812, 0.26015281677246094, -0.08705520629882812, -0.00020599365234375, -0.02773284912109375, -0.33496856689453125, 0.37860107421875, 0.2123737335205078, -0.9450836181640625, 0.1209716796875, 0.311767578125, 0.16760635375976562, 0.3451690673828125, 0.23597335815429688, 0.3520965576171875, 0.1089019775390625, 0.117431640625, -0.3931846618652344, 0.4377593994140625, 0.20929718017578125, 0.013317108154296875, 0.23410797119140625, -0.20751953125, 0.1409912109375, 0.13661575317382812, 0.3719482421875, 0.02471160888671875, -0.1429443359375, -0.43695831298828125, -0.06834983825683594, -0.484344482421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000022.npy"}
|
||||
{"epoch": 0.03325774754346183, "step": 23, "batch_size": 64, "mean": 0.03891530632972717, "std": 0.28755703568458557, "min": -0.6338348388671875, "p10": -0.30487899780273436, "median": 0.07206344604492188, "p90": 0.3916866302490236, "max": 0.6344985961914062, "pos_frac": 0.578125, "sample": [-0.4492034912109375, 0.30059814453125, 0.47885894775390625, 0.614654541015625, 0.10625267028808594, 0.1092681884765625, -0.23003387451171875, -0.6338348388671875, 0.135772705078125, 0.346588134765625, 0.28899383544921875, -0.2843914031982422, 0.2171630859375, 0.29804039001464844, 0.03906822204589844, -0.23675537109375, -0.574371337890625, 0.2845306396484375, -0.07552719116210938, 0.6344985961914062, 0.5626602172851562, 0.2547168731689453, 0.14068031311035156, -0.2985877990722656, 0.4680290222167969, 0.1902313232421875, -0.10485076904296875, -0.10206222534179688, 0.1225128173828125, 0.07735443115234375, -0.4994659423828125, 0.06957244873046875, -0.414093017578125, 0.13228607177734375, 0.4110145568847656, -0.15607833862304688, -0.1654205322265625, 0.332763671875, 0.13538742065429688, -0.4709758758544922, 0.074554443359375, -0.1123809814453125, -0.21078872680664062, 0.18758392333984375, 0.20331192016601562, -0.1120147705078125, -0.029937744140625, -0.123687744140625, 0.15515899658203125, -0.28570556640625, -0.0511932373046875, 0.08311271667480469, -0.008358001708984375, -0.04386329650878906, -0.3075752258300781, 0.17524337768554688, -0.2445068359375, 0.0645904541015625, 0.28177642822265625, 0.5199966430664062, 0.056793212890625, -0.105133056640625, 0.23144149780273438, 0.03631591796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000023.npy"}
|
||||
{"epoch": 0.03476946334089191, "step": 24, "batch_size": 64, "mean": -0.00348663330078125, "std": 0.25667834281921387, "min": -0.69500732421875, "p10": -0.3229042053222656, "median": 0.004730224609375, "p90": 0.27597351074218757, "max": 0.6384124755859375, "pos_frac": 0.5, "sample": [0.2560310363769531, -0.16542434692382812, -0.0088958740234375, 0.030120849609375, -0.08552932739257812, -0.2745323181152344, 0.3479957580566406, 0.3097381591796875, -0.07725906372070312, -0.287384033203125, -0.03397369384765625, 0.15369606018066406, -0.24113845825195312, 0.0826416015625, 0.21242523193359375, 0.0696258544921875, 0.169769287109375, -0.2214488983154297, 0.11006927490234375, 0.06789970397949219, 0.0726165771484375, 0.6384124755859375, 0.0741424560546875, -0.2582740783691406, 0.5273361206054688, 0.10602188110351562, 0.14435958862304688, -0.029102325439453125, -0.4316253662109375, 0.1695404052734375, -0.07940864562988281, -0.22867202758789062, -0.05622100830078125, 0.07799148559570312, -0.018802642822265625, -0.5986328125, 0.0183563232421875, -0.236236572265625, -0.02895355224609375, -0.06522369384765625, 0.559051513671875, -0.02851104736328125, 0.3451042175292969, -0.013628005981445312, 0.0402069091796875, 0.28094482421875, -0.16310882568359375, -0.4463348388671875, -0.13828277587890625, 0.06817626953125, 0.24169540405273438, -0.45050048828125, 0.19925689697265625, 0.15978240966796875, -0.69500732421875, 0.16664886474609375, -0.050060272216796875, 0.264373779296875, 0.12433624267578125, -0.1900787353515625, -0.33812713623046875, -0.3936004638671875, -0.0531005859375, 0.07556724548339844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000024.npy"}
|
||||
{"epoch": 0.036281179138321996, "step": 25, "batch_size": 64, "mean": 0.031399667263031006, "std": 0.25197798013687134, "min": -0.5789794921875, "p10": -0.21382522583007812, "median": 0.014848709106445312, "p90": 0.3043426513671876, "max": 0.7813720703125, "pos_frac": 0.515625, "sample": [-0.08445358276367188, 0.2672576904296875, -0.18767166137695312, 0.280181884765625, 0.048458099365234375, -0.10292243957519531, -0.01314544677734375, -0.20768356323242188, 0.583404541015625, 0.2726936340332031, 0.0709075927734375, 0.2317638397216797, -0.19969940185546875, -0.0473175048828125, 0.0302886962890625, 0.14635848999023438, -0.08950042724609375, 0.15240097045898438, 0.16745758056640625, -0.0061492919921875, -0.24817276000976562, 0.05805206298828125, -0.32869720458984375, -0.048370361328125, 0.3293876647949219, -0.13087844848632812, -0.028499603271484375, 0.714813232421875, -0.19666671752929688, 0.038341522216796875, 0.15584373474121094, 0.033893585205078125, 0.310394287109375, 0.1012725830078125, 0.08121490478515625, 0.0922088623046875, 0.091522216796875, -0.25432586669921875, -0.19132423400878906, -0.4015655517578125, -0.5789794921875, -0.07248687744140625, -0.2087249755859375, -0.19121932983398438, 0.12590789794921875, 0.05614471435546875, -0.04138946533203125, 0.002429962158203125, -0.02936553955078125, -0.2339935302734375, -0.14356613159179688, 0.06742477416992188, -0.08855438232421875, -0.12587356567382812, 0.6993942260742188, 0.193389892578125, 0.29022216796875, 0.38871002197265625, -0.1366424560546875, 0.08734130859375, -0.21601104736328125, -0.1342926025390625, 0.0272674560546875, 0.7813720703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000025.npy"}
|
||||
{"epoch": 0.03779289493575208, "step": 26, "batch_size": 64, "mean": -0.00825345516204834, "std": 0.34476956725120544, "min": -0.6739883422851562, "p10": -0.3919055938720703, "median": 0.024076461791992188, "p90": 0.3190696716308594, "max": 1.16204833984375, "pos_frac": 0.515625, "sample": [0.32381439208984375, -0.319915771484375, -0.40190887451171875, -0.35024261474609375, 0.03874969482421875, 0.21052169799804688, -0.6739883422851562, -0.3685646057128906, -0.3056678771972656, 0.3355827331542969, -0.07744979858398438, 1.16204833984375, 0.1909332275390625, -0.424163818359375, -0.3101348876953125, 0.05243682861328125, -0.105926513671875, 0.4438133239746094, 0.5540542602539062, 0.27930641174316406, -0.300445556640625, -0.2029266357421875, 0.09872055053710938, -0.5872955322265625, -0.134521484375, -0.14415740966796875, 0.04735565185546875, 0.2171764373779297, 0.27060508728027344, 0.07901382446289062, -0.0032901763916015625, 1.0608901977539062, 0.14458465576171875, 0.4774131774902344, 0.20635223388671875, 0.3079986572265625, -0.3296661376953125, 0.09842872619628906, -0.10167694091796875, 0.2808685302734375, -0.12403678894042969, 0.0514984130859375, -0.2740459442138672, 0.01438140869140625, 0.18819427490234375, -0.5030136108398438, -0.584991455078125, 0.16380882263183594, 0.20465087890625, -0.070709228515625, 0.23148345947265625, -0.13524627685546875, 0.047618865966796875, -0.1859588623046875, -0.29790687561035156, -0.363861083984375, -0.666351318359375, -0.1644268035888672, 0.050079345703125, 0.15791702270507812, 0.033771514892578125, -0.07013893127441406, 0.07737350463867188, -0.04703712463378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000026.npy"}
|
||||
{"epoch": 0.039304610733182165, "step": 27, "batch_size": 64, "mean": -0.04079878330230713, "std": 0.2719458341598511, "min": -0.6866455078125, "p10": -0.41147155761718746, "median": 0.00234222412109375, "p90": 0.262457275390625, "max": 0.6306533813476562, "pos_frac": 0.5, "sample": [-0.010955810546875, 0.1258106231689453, 0.15859222412109375, -0.0342864990234375, 0.04241752624511719, 0.0860748291015625, -0.26869964599609375, -0.205596923828125, -0.18840789794921875, -0.6866455078125, 0.060333251953125, -0.2416229248046875, 0.22972679138183594, 0.1761760711669922, 0.06708335876464844, 0.09268569946289062, 0.0387115478515625, -0.1835174560546875, -0.5126419067382812, -0.2581748962402344, 0.0493927001953125, 0.2248687744140625, 0.51556396484375, -0.460113525390625, 0.2504730224609375, -0.2493133544921875, 0.02513885498046875, -0.050807952880859375, 0.17919158935546875, -0.1618804931640625, 0.08739471435546875, 0.018060684204101562, 0.16917991638183594, 0.13222503662109375, 0.6306533813476562, -0.5055923461914062, 0.2766456604003906, 0.47882843017578125, 0.2675933837890625, -0.06883621215820312, -0.23468780517578125, -0.4188690185546875, 0.08322525024414062, -0.009250640869140625, -0.08647918701171875, 0.28491973876953125, 0.209381103515625, -0.16614532470703125, -0.3942108154296875, -0.09744453430175781, -0.3269233703613281, 0.0462493896484375, 0.021429061889648438, -0.0566253662109375, 0.12689590454101562, -0.3744049072265625, 0.383026123046875, -0.14941787719726562, -0.624176025390625, -0.06270408630371094, -0.23207855224609375, -0.5508575439453125, 0.013935089111328125, -0.2916374206542969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000027.npy"}
|
||||
{"epoch": 0.04081632653061224, "step": 28, "batch_size": 64, "mean": 0.021502047777175903, "std": 0.3123089373111725, "min": -0.7056350708007812, "p10": -0.31937713623046876, "median": -0.009019851684570312, "p90": 0.4376707077026367, "max": 0.8148345947265625, "pos_frac": 0.484375, "sample": [0.5976791381835938, -0.3124237060546875, -0.082672119140625, 0.8004684448242188, -0.2274932861328125, -0.2877464294433594, 0.1470489501953125, 0.0867919921875, 0.44426727294921875, 0.8148345947265625, -0.24776458740234375, -0.02433013916015625, -0.46332550048828125, -0.7027587890625, -0.09576416015625, -0.240234375, -0.11297798156738281, -0.1873950958251953, -0.09388351440429688, 0.14581298828125, 0.3960418701171875, 0.2767333984375, -0.029232025146484375, -0.18903350830078125, 0.3363075256347656, 0.18286514282226562, -0.12177658081054688, 0.2948341369628906, -0.430328369140625, -0.03575897216796875, -0.194183349609375, -0.13952255249023438, 0.36600494384765625, 0.02069854736328125, -0.00620269775390625, 0.4321632385253906, 0.011081695556640625, -0.06050872802734375, -0.04213714599609375, -0.322357177734375, -0.03908729553222656, -0.7056350708007812, 0.0706787109375, 0.4400310516357422, 0.10887908935546875, -0.047939300537109375, -0.025297164916992188, -0.5403289794921875, -0.06257438659667969, 0.021518707275390625, 0.09508514404296875, 0.5213851928710938, -0.011837005615234375, 0.3233184814453125, 0.536834716796875, -0.2411041259765625, 0.0056629180908203125, -0.4371337890625, 0.1698760986328125, 0.042842864990234375, 0.043548583984375, 0.07851028442382812, 0.24233245849609375, 0.08274078369140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000028.npy"}
|
||||
{"epoch": 0.042328042328042326, "step": 29, "batch_size": 64, "mean": 0.09683471918106079, "std": 0.30000466108322144, "min": -0.8917617797851562, "p10": -0.21830806732177732, "median": 0.09701061248779297, "p90": 0.4415245056152344, "max": 0.73443603515625, "pos_frac": 0.59375, "sample": [0.4907417297363281, -0.01248931884765625, 0.30255126953125, -0.1246490478515625, 0.3924560546875, 0.022579193115234375, -0.01427459716796875, -0.10897254943847656, -0.8917617797851562, 0.438720703125, -0.10237312316894531, 0.18912887573242188, 0.0791015625, -0.12104415893554688, 0.2860679626464844, 0.621337890625, -0.225128173828125, -0.014728546142578125, 0.235748291015625, 0.44272613525390625, -0.03140068054199219, -0.2940673828125, 0.038227081298828125, -0.16650009155273438, 0.73443603515625, 0.5258941650390625, 0.21107101440429688, -0.22713470458984375, 0.4318084716796875, 0.10634040832519531, -0.17449188232421875, 0.29449462890625, -0.121185302734375, 0.12811660766601562, 0.21832275390625, 0.20564651489257812, 0.27001953125, 0.29636383056640625, 0.251861572265625, 0.5934295654296875, -0.2023944854736328, -0.10842514038085938, -0.2558422088623047, 0.08768081665039062, -0.025968551635742188, 0.06908416748046875, -0.4287452697753906, -0.0407867431640625, -0.11128997802734375, 0.13897132873535156, 0.21307373046875, 0.28809547424316406, 0.30268096923828125, 0.1123809814453125, 0.044921875, -0.05828094482421875, -0.7546768188476562, 0.26291656494140625, 0.6340255737304688, 0.341064453125, 0.2565460205078125, -0.10424041748046875, -0.03746223449707031, 0.39710235595703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000029.npy"}
|
||||
{"epoch": 0.04383975812547241, "step": 30, "batch_size": 64, "mean": 0.061375439167022705, "std": 0.32676199078559875, "min": -0.741546630859375, "p10": -0.2890815734863281, "median": 0.04201984405517578, "p90": 0.427535629272461, "max": 1.136077880859375, "pos_frac": 0.578125, "sample": [-0.15597915649414062, 0.03773689270019531, -0.28873443603515625, 0.00399017333984375, -0.1054534912109375, -0.21231842041015625, 0.17297935485839844, -0.0981292724609375, 0.06982421875, -0.2892303466796875, 0.16216278076171875, -0.30487060546875, 0.20291900634765625, 0.25197601318359375, -0.5474624633789062, -0.2285308837890625, -0.17618179321289062, 1.136077880859375, -0.4333953857421875, 0.15663909912109375, 0.06198883056640625, -0.138824462890625, 0.05748748779296875, 0.3399829864501953, 0.117950439453125, -0.061859130859375, 0.038585662841796875, 0.20969772338867188, 0.3192596435546875, 0.03433990478515625, 0.435028076171875, 0.14680862426757812, -0.3083000183105469, 0.4100532531738281, 0.686981201171875, 0.253143310546875, 0.3240966796875, -0.082427978515625, 0.2659912109375, -0.21477699279785156, 0.038372039794921875, 0.4398040771484375, 1.05169677734375, 0.6528472900390625, -0.030490875244140625, 0.3009033203125, 0.455352783203125, -0.2610664367675781, 0.18244361877441406, -0.519378662109375, -0.02686309814453125, 0.22586822509765625, 0.04545402526855469, -0.12467193603515625, -0.1459503173828125, -0.006374359130859375, 0.05562591552734375, -0.05805206298828125, -0.741546630859375, 0.10860633850097656, -0.00557708740234375, 0.07831192016601562, -0.193359375, 0.1568470001220703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000030.npy"}
|
||||
{"epoch": 0.045351473922902494, "step": 31, "batch_size": 64, "mean": 0.07603979110717773, "std": 0.3079001307487488, "min": -0.619659423828125, "p10": -0.29028472900390623, "median": 0.01894092559814453, "p90": 0.5450567245483399, "max": 0.629241943359375, "pos_frac": 0.578125, "sample": [-0.03746795654296875, 0.189697265625, -0.5291290283203125, 0.2097148895263672, -0.21449661254882812, 0.101409912109375, -0.2577247619628906, 0.5867347717285156, 0.55047607421875, -0.29201507568359375, 0.28156280517578125, 0.5564041137695312, 0.5324115753173828, 0.564361572265625, 0.501312255859375, -0.09979248046875, 0.19548416137695312, -0.2765350341796875, 0.25141143798828125, -0.128326416015625, 0.3889923095703125, 0.18231201171875, -0.06343841552734375, -0.08759689331054688, 0.5128345489501953, 0.1233673095703125, 0.34294891357421875, -0.28624725341796875, -0.24037933349609375, 0.4096527099609375, 0.0186920166015625, 0.0657501220703125, -0.2740936279296875, -0.43205833435058594, -0.11604881286621094, 0.3936767578125, -0.3191070556640625, -0.31131744384765625, 0.0044879913330078125, 0.0393829345703125, -0.01346588134765625, -0.619659423828125, 0.004009246826171875, 0.5825653076171875, 0.6221923828125, 0.323516845703125, -0.014425277709960938, -0.03176689147949219, 0.23729324340820312, 0.014801025390625, -0.3857269287109375, 0.22352981567382812, 0.0554351806640625, 0.019189834594726562, -0.22308349609375, 0.17501449584960938, 0.2416210174560547, -0.044189453125, -0.013238906860351562, -0.13726043701171875, 0.629241943359375, 0.39879417419433594, -0.2222900390625, 0.007144927978515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000031.npy"}
|
||||
{"epoch": 0.04686318972033258, "step": 32, "batch_size": 64, "mean": 0.02282300591468811, "std": 0.3216497302055359, "min": -0.9964599609375, "p10": -0.31575355529785154, "median": 0.011810302734375, "p90": 0.3844852447509766, "max": 0.7619171142578125, "pos_frac": 0.515625, "sample": [-0.18706512451171875, 0.2462158203125, -0.7820968627929688, -0.0038909912109375, 0.552886962890625, 0.11353302001953125, 0.0037994384765625, -0.419464111328125, -0.0958099365234375, -0.0419921875, 0.5867652893066406, 0.392913818359375, 0.05626678466796875, 0.0503082275390625, 0.4305000305175781, -0.9964599609375, 0.0601959228515625, 0.3648185729980469, -0.22854232788085938, 0.21689224243164062, -0.18708229064941406, -0.15985107421875, 0.09508132934570312, 0.294769287109375, 0.19441986083984375, -0.06576156616210938, 0.23418045043945312, 0.1562652587890625, 0.42962646484375, -0.07256317138671875, -0.16860389709472656, 0.7619171142578125, -0.004230499267578125, 0.34820556640625, 0.28160858154296875, 0.15151214599609375, 0.16809844970703125, -0.3158531188964844, 0.3055572509765625, 0.6295013427734375, 0.30563926696777344, -0.03729248046875, -0.315521240234375, -0.00023651123046875, 0.03750419616699219, 0.2644805908203125, -0.09563446044921875, -0.7393989562988281, -0.248260498046875, 0.22378158569335938, -0.4257354736328125, 0.15514755249023438, -0.4885597229003906, -0.13916397094726562, -0.21068191528320312, -0.09396743774414062, -0.07940673828125, 0.2284259796142578, 0.12694931030273438, -0.009227752685546875, -0.2563285827636719, -0.0455322265625, -0.112701416015625, 0.0198211669921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000032.npy"}
|
||||
{"epoch": 0.04837490551776266, "step": 33, "batch_size": 64, "mean": -0.0015421658754348755, "std": 0.3758452236652374, "min": -1.500823974609375, "p10": -0.3754615783691406, "median": 0.04964256286621094, "p90": 0.45260086059570326, "max": 0.859771728515625, "pos_frac": 0.5625, "sample": [-0.38423919677734375, -0.562408447265625, 0.05486297607421875, 0.08167266845703125, 0.18863677978515625, 0.04404449462890625, -0.114532470703125, 0.029508590698242188, 0.46678924560546875, 0.5774002075195312, 0.2433929443359375, 0.048252105712890625, -0.17740631103515625, -0.2669048309326172, 0.8449935913085938, -0.4825248718261719, -0.35498046875, 0.10753631591796875, 0.05103302001953125, -0.0250244140625, 0.15045928955078125, 0.859771728515625, 0.06875991821289062, -0.07726097106933594, -0.9869842529296875, 0.4730072021484375, -0.12326812744140625, -0.08218002319335938, -0.3921051025390625, 0.41949462890625, 0.24157142639160156, -0.04471588134765625, 0.5691757202148438, -0.18914031982421875, 0.10891532897949219, 0.23819732666015625, 0.2473468780517578, -0.3189697265625, 0.05445098876953125, -0.2650032043457031, 0.0215911865234375, -0.5036506652832031, -0.30387306213378906, -0.3060302734375, 0.1584625244140625, -0.27651214599609375, 0.23478317260742188, 0.0780487060546875, -0.032990455627441406, 0.10966873168945312, -0.30315208435058594, -0.03224945068359375, 0.06990432739257812, -0.05298614501953125, -0.20606231689453125, 0.06097412109375, 0.1683197021484375, 0.283294677734375, -1.500823974609375, -0.287200927734375, 0.3061790466308594, 0.5479736328125, 0.1217041015625, 0.22430419921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000033.npy"}
|
||||
{"epoch": 0.049886621315192746, "step": 34, "batch_size": 64, "mean": 0.09580284357070923, "std": 0.3143908381462097, "min": -0.7285232543945312, "p10": -0.25955772399902344, "median": 0.11235237121582031, "p90": 0.49003963470458994, "max": 0.72509765625, "pos_frac": 0.59375, "sample": [-0.2655200958251953, 0.6852569580078125, 0.3088226318359375, 0.21434783935546875, 0.1725006103515625, 0.3872184753417969, -0.12291526794433594, -0.038150787353515625, 0.19957542419433594, -0.08161163330078125, 0.46173095703125, 0.252288818359375, 0.344818115234375, 0.5090484619140625, -0.048065185546875, 0.236419677734375, 0.72509765625, -0.058673858642578125, 0.46260643005371094, 0.12191963195800781, 0.06694793701171875, 0.0421905517578125, 0.1435089111328125, -0.02655029296875, -0.05966949462890625, 0.13222503662109375, -0.01519775390625, 0.4111175537109375, 0.6660308837890625, -0.29816436767578125, 0.18810653686523438, 0.19130706787109375, -0.0482177734375, 0.1536426544189453, 0.11483383178710938, 0.519256591796875, 0.5017967224121094, 0.2330760955810547, -0.07115554809570312, 0.10987091064453125, -0.6370391845703125, 0.04563140869140625, 0.020725250244140625, 0.19159317016601562, 0.7115478515625, 0.3945465087890625, -0.3358154296875, 0.21764373779296875, 0.22978973388671875, 0.4111480712890625, 0.04910087585449219, -0.7285232543945312, 0.38590240478515625, -0.09632110595703125, -0.05916595458984375, -0.4382057189941406, -0.084686279296875, -0.24564552307128906, -0.06970596313476562, -0.020425796508789062, -0.23307037353515625, -0.15493011474609375, -0.1220703125, -0.7223129272460938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000034.npy"}
|
||||
{"epoch": 0.05139833711262283, "step": 35, "batch_size": 64, "mean": 0.05110803246498108, "std": 0.40384572744369507, "min": -0.9785614013671875, "p10": -0.36493682861328125, "median": 0.009527206420898438, "p90": 0.5380680084228516, "max": 1.4970245361328125, "pos_frac": 0.515625, "sample": [-0.039703369140625, -0.3903350830078125, 0.4552574157714844, 0.17829513549804688, -0.18365097045898438, 1.01470947265625, -0.13829803466796875, 0.016656875610351562, -0.3328094482421875, -0.23224258422851562, 0.3145904541015625, 0.3045368194580078, 0.19132423400878906, 0.1494007110595703, -0.24319839477539062, -0.015033721923828125, 1.4970245361328125, -0.1893310546875, 0.3016357421875, 0.8966789245605469, 0.5415000915527344, -0.10464859008789062, 0.7167472839355469, 0.2507781982421875, 0.046550750732421875, -0.07150650024414062, -0.645599365234375, -0.3677520751953125, -0.358367919921875, 0.35768890380859375, -0.9785614013671875, 0.7020645141601562, 0.1994781494140625, -0.284149169921875, -0.49080657958984375, -0.5873565673828125, 0.08858299255371094, 0.23199462890625, 0.08932304382324219, -0.08145523071289062, -0.037387847900390625, 0.3347034454345703, 0.530059814453125, -0.1732635498046875, 0.08319091796875, -0.21202850341796875, 0.268096923828125, -0.123321533203125, -0.16727447509765625, 0.0023975372314453125, -0.11789894104003906, -0.21455764770507812, -0.17276763916015625, 0.0439453125, -0.5011672973632812, -0.0887451171875, -0.049896240234375, 0.01892852783203125, -0.261199951171875, 0.265289306640625, 0.142181396484375, 0.6350860595703125, 0.047817230224609375, 0.20871353149414062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000035.npy"}
|
||||
{"epoch": 0.05291005291005291, "step": 36, "batch_size": 64, "mean": 0.006341129541397095, "std": 0.38383564352989197, "min": -0.9349441528320312, "p10": -0.39201946258544923, "median": 0.0103302001953125, "p90": 0.44717712402343757, "max": 1.2117919921875, "pos_frac": 0.53125, "sample": [0.41876220703125, 1.2117919921875, 0.009674072265625, -0.6006736755371094, -0.3294944763183594, -0.10525894165039062, -0.231536865234375, 0.1585235595703125, 0.1728687286376953, -0.27431488037109375, 0.29123687744140625, 0.30851173400878906, 0.16069793701171875, 0.010986328125, -0.03076934814453125, -0.3156700134277344, 0.0887908935546875, -0.135650634765625, 0.07634925842285156, 0.5569305419921875, -0.05742645263671875, 0.3006591796875, 0.25626373291015625, 0.10272216796875, -0.3435096740722656, -0.42675018310546875, -0.08715438842773438, 0.09549713134765625, 0.10941314697265625, -0.747589111328125, -0.3067054748535156, 0.19869041442871094, -0.0790863037109375, 0.1622314453125, 0.06436920166015625, -0.24792861938476562, -0.07122039794921875, -0.3441162109375, -0.39757728576660156, -0.26348114013671875, 0.10558700561523438, 0.4513702392578125, 0.79962158203125, -0.19991493225097656, 0.004329681396484375, 0.2695331573486328, 0.4373931884765625, 0.4681510925292969, 0.8764572143554688, 0.20502471923828125, 0.5155067443847656, -0.5611915588378906, 0.20996665954589844, -0.2669830322265625, -0.37905120849609375, -0.9349441528320312, -0.1357879638671875, 0.1677398681640625, -0.826080322265625, 0.05904388427734375, -0.21004295349121094, 0.303558349609375, -0.24372100830078125, -0.06879043579101562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000036.npy"}
|
||||
{"epoch": 0.05442176870748299, "step": 37, "batch_size": 64, "mean": 0.1288444697856903, "std": 0.345906525850296, "min": -0.5561599731445312, "p10": -0.22906799316406248, "median": 0.07873058319091797, "p90": 0.5433677673339844, "max": 1.3092041015625, "pos_frac": 0.609375, "sample": [0.0034637451171875, 0.5401687622070312, 0.14403533935546875, 0.04203033447265625, 0.52398681640625, -0.022792816162109375, -0.09372901916503906, 0.06110191345214844, -0.0439453125, -0.2448883056640625, 0.04156494140625, 0.4545135498046875, 0.40755462646484375, 0.54473876953125, -0.10838699340820312, -0.5561599731445312, 0.2591209411621094, 0.6608123779296875, 0.2916297912597656, 0.380828857421875, -0.383087158203125, -0.18416786193847656, 0.327178955078125, 0.07543182373046875, -0.04468727111816406, -0.061695098876953125, 0.10153961181640625, -0.1993885040283203, 0.14289093017578125, 0.11566162109375, 0.0460662841796875, -0.29640960693359375, 0.392669677734375, 0.6963043212890625, 1.3092041015625, 0.00075531005859375, 0.1007537841796875, -0.1810150146484375, -0.07954597473144531, -0.239166259765625, 1.114990234375, -0.10112380981445312, -0.05352020263671875, 0.21675872802734375, -0.3578643798828125, 0.14923477172851562, 0.188140869140625, 0.9078826904296875, -0.1982269287109375, 0.299041748046875, 0.08202934265136719, -0.26876068115234375, 0.1617908477783203, -0.027578353881835938, 0.35282135009765625, 0.1201019287109375, -0.1773834228515625, 0.1969890594482422, 0.2449951171875, -0.12403106689453125, 0.1022186279296875, -0.20550537109375, -0.017595291137695312, 0.7156982421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000037.npy"}
|
||||
{"epoch": 0.055933484504913075, "step": 38, "batch_size": 64, "mean": 0.04614526033401489, "std": 0.36860084533691406, "min": -0.8191375732421875, "p10": -0.4152368545532226, "median": -0.0036230087280273438, "p90": 0.43193588256835946, "max": 1.1771240234375, "pos_frac": 0.5, "sample": [0.04250907897949219, 0.4682331085205078, -0.22483062744140625, 1.1699180603027344, -0.0962982177734375, 0.2838859558105469, 0.2514801025390625, 0.24361801147460938, -0.09694671630859375, 0.6444244384765625, 0.25604820251464844, 0.2552375793457031, 0.24192047119140625, -0.1413707733154297, -0.03765106201171875, -0.43138885498046875, 0.03049468994140625, -0.06888580322265625, -0.07132720947265625, -0.0550689697265625, -0.189239501953125, 0.3282623291015625, -0.14886474609375, -0.328460693359375, -0.125091552734375, 0.415802001953125, 0.291595458984375, -0.8191375732421875, 0.1087799072265625, -0.06519317626953125, -0.4283580780029297, -0.38462066650390625, 0.10109901428222656, 0.3098258972167969, 0.575775146484375, 0.3464508056640625, 0.18058013916015625, -0.5097694396972656, 0.19768524169921875, -0.09227752685546875, 0.3673744201660156, 0.43885040283203125, 0.030405044555664062, 0.2693977355957031, -0.10188674926757812, -0.3251762390136719, -0.4541206359863281, -0.3454017639160156, 0.3828582763671875, 1.1771240234375, -0.5360565185546875, -0.112518310546875, 0.33099365234375, -0.3006134033203125, -0.29451942443847656, 0.1723461151123047, 0.33779144287109375, -0.2549934387207031, -0.5128936767578125, -0.06500244140625, 0.4455070495605469, -0.059112548828125, 0.10565757751464844, -0.17155838012695312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000038.npy"}
|
||||
{"epoch": 0.05744520030234316, "step": 39, "batch_size": 64, "mean": 0.010573387145996094, "std": 0.3600960373878479, "min": -1.1023025512695312, "p10": -0.37825393676757807, "median": 0.018619537353515625, "p90": 0.4066680908203125, "max": 1.31927490234375, "pos_frac": 0.515625, "sample": [0.381561279296875, -0.28479766845703125, 0.3792877197265625, 0.4323310852050781, -0.0415191650390625, -0.629302978515625, 0.4091949462890625, 0.283447265625, -0.018215179443359375, 0.1929931640625, -0.21950531005859375, -0.06170845031738281, 0.25942039489746094, -0.03949546813964844, -0.06401824951171875, 0.04505348205566406, -0.14072608947753906, 0.5159873962402344, 0.5160198211669922, -0.049224853515625, -0.5819606781005859, -0.3028106689453125, -0.14665985107421875, 0.08780670166015625, 1.31927490234375, 0.0647125244140625, -0.494232177734375, 0.2617664337158203, 0.0779266357421875, -0.2666282653808594, 0.166717529296875, 0.4007720947265625, 0.0712890625, 0.11853408813476562, 0.0314483642578125, 0.4254188537597656, -0.2501487731933594, -0.2223358154296875, 0.069580078125, -0.7329635620117188, 0.3114471435546875, -0.7407379150390625, 0.07008552551269531, 0.13994598388671875, 0.335174560546875, -0.34180450439453125, -0.3938751220703125, 0.197479248046875, 0.10005569458007812, -0.11260604858398438, 0.21651458740234375, -1.1023025512695312, -0.0607757568359375, -0.15719032287597656, -0.0609283447265625, 0.487548828125, 0.04873466491699219, 0.2102508544921875, -0.0067901611328125, -0.10854721069335938, -0.1385955810546875, 0.00579071044921875, -0.1311359405517578, -0.055332183837890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000039.npy"}
|
||||
{"epoch": 0.05895691609977324, "step": 40, "batch_size": 64, "mean": -0.019462496042251587, "std": 0.3502136766910553, "min": -1.01434326171875, "p10": -0.41955795288085934, "median": -0.07097053527832031, "p90": 0.4740646362304688, "max": 0.863616943359375, "pos_frac": 0.4375, "sample": [-0.09047698974609375, 0.6627044677734375, 0.34783935546875, 0.1827373504638672, -0.15015220642089844, 0.11199188232421875, 0.467559814453125, 0.14759063720703125, -0.2059326171875, 0.306488037109375, 0.10496139526367188, 0.17183876037597656, 0.05591583251953125, -0.020433425903320312, 0.07260513305664062, -0.19520950317382812, -0.072540283203125, 0.7593536376953125, 0.6227035522460938, -0.1780872344970703, -0.2825736999511719, 0.4768524169921875, 0.04085540771484375, -0.06940078735351562, 0.48070526123046875, -0.01678466796875, -0.15773773193359375, -0.195709228515625, 0.863616943359375, -0.299560546875, 0.2701416015625, -0.5272979736328125, 0.27679443359375, 0.07087326049804688, 0.12808990478515625, 0.14284324645996094, -0.13547134399414062, 0.029315948486328125, -0.23812103271484375, -0.1773529052734375, -0.08929443359375, -0.376617431640625, -0.05620384216308594, -0.3677825927734375, -0.32381439208984375, -0.15449905395507812, 0.11389350891113281, -0.16736221313476562, -0.18169784545898438, -0.087127685546875, 0.1497039794921875, -0.6144256591796875, -0.3837547302246094, 0.718902587890625, -0.2696533203125, -0.4349021911621094, -1.01434326171875, -0.09527587890625, -0.454864501953125, -0.478240966796875, 0.20377349853515625, -0.07999992370605469, -0.6342315673828125, 0.05068206787109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000040.npy"}
|
||||
{"epoch": 0.06046863189720333, "step": 41, "batch_size": 64, "mean": 0.07305684685707092, "std": 0.346498042345047, "min": -0.6528739929199219, "p10": -0.36113052368164056, "median": 0.044178009033203125, "p90": 0.5414342880249023, "max": 0.8460540771484375, "pos_frac": 0.59375, "sample": [0.03356170654296875, 0.00028228759765625, 0.06667518615722656, 0.18164825439453125, 0.0547943115234375, -0.0602264404296875, -0.18055343627929688, 0.5454597473144531, -0.1629486083984375, -0.30199432373046875, 0.19304656982421875, 0.007289886474609375, 0.06551361083984375, 0.5792694091796875, 0.1295928955078125, 0.8460540771484375, -0.2329998016357422, 0.734954833984375, -0.275726318359375, -0.18233680725097656, 0.6956405639648438, -0.6027069091796875, -0.2744483947753906, 0.42389678955078125, 0.5320415496826172, -0.4209136962890625, 0.168701171875, 0.06012725830078125, 0.24689483642578125, -0.0276031494140625, 0.01625823974609375, 0.5775909423828125, 0.531280517578125, 0.393951416015625, -0.06707000732421875, -0.563995361328125, 0.4476203918457031, -0.3992805480957031, 0.46282958984375, 0.0034961700439453125, -0.11609268188476562, -0.6528739929199219, -0.0075225830078125, 0.008121490478515625, 0.1806793212890625, -0.2721996307373047, 0.08549880981445312, -0.19568634033203125, 0.45145416259765625, 0.143585205078125, -0.2541694641113281, -0.51300048828125, -0.27309417724609375, -0.386474609375, 0.2678413391113281, -0.1239166259765625, 0.4564971923828125, 0.32491302490234375, 0.3828144073486328, -0.041168212890625, 0.6307296752929688, 0.24468231201171875, -0.043277740478515625, 0.13262939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000041.npy"}
|
||||
{"epoch": 0.06198034769463341, "step": 42, "batch_size": 64, "mean": 0.028522223234176636, "std": 0.37105104327201843, "min": -0.7398910522460938, "p10": -0.4953950881958008, "median": 0.04698944091796875, "p90": 0.4574645996093751, "max": 0.8264617919921875, "pos_frac": 0.5625, "sample": [0.4319610595703125, 0.54986572265625, -0.1981048583984375, -0.016750335693359375, -0.46372222900390625, -0.6986083984375, -0.01224517822265625, -0.005570411682128906, 0.1081085205078125, 0.3349761962890625, -0.292572021484375, 0.22432708740234375, -0.2300872802734375, -0.008609771728515625, 0.4681396484375, 0.5874214172363281, 0.5550537109375, 0.8264617919921875, 0.059894561767578125, 0.0504913330078125, 0.043487548828125, 0.35463714599609375, 0.14405059814453125, 0.0852203369140625, -0.6099700927734375, -0.4770050048828125, -0.16781997680664062, -0.1048126220703125, -0.1637554168701172, -0.4975872039794922, 0.09285736083984375, 0.262786865234375, -0.1581573486328125, -0.4797210693359375, 0.7523117065429688, 0.0224151611328125, 0.32653045654296875, 0.05802154541015625, -0.7398910522460938, 0.43255615234375, -0.07739830017089844, -0.55670166015625, -0.5104827880859375, -0.170501708984375, -0.4902801513671875, -0.5183029174804688, 0.32076263427734375, 0.0045166015625, -0.3380889892578125, -0.45306396484375, 0.05924415588378906, 0.4115791320800781, -0.033782958984375, 0.2992706298828125, 0.35345458984375, 0.05696868896484375, -0.281341552734375, 0.005089759826660156, 0.5563125610351562, 0.3840179443359375, 0.24605369567871094, 0.3103828430175781, 0.36867523193359375, 0.4324531555175781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000042.npy"}
|
||||
{"epoch": 0.06349206349206349, "step": 43, "batch_size": 64, "mean": 0.0057284533977508545, "std": 0.37580135464668274, "min": -0.9161376953125, "p10": -0.4983890533447265, "median": 0.07522869110107422, "p90": 0.5033660888671876, "max": 0.69842529296875, "pos_frac": 0.53125, "sample": [-0.152099609375, 0.34358978271484375, -0.9161376953125, -0.08100318908691406, -0.03546905517578125, -0.39252281188964844, -0.2907447814941406, 0.09127044677734375, 0.69842529296875, -0.7377777099609375, 0.20211410522460938, 0.267059326171875, -0.0657196044921875, -0.2759246826171875, 0.478271484375, 0.1295928955078125, 0.5886821746826172, 0.08692169189453125, 0.22298812866210938, -0.32663726806640625, 0.022670745849609375, -0.08935546875, -0.20278167724609375, 0.10231399536132812, 0.6770172119140625, -0.08889198303222656, 0.23800277709960938, 0.2333984375, -0.14696884155273438, 0.513885498046875, 0.284576416015625, -0.5903472900390625, 0.12129974365234375, -0.5195388793945312, -0.20585060119628906, -0.0847015380859375, 0.33026123046875, -0.36698150634765625, 0.5251541137695312, 0.590728759765625, -0.2398681640625, -0.32688140869140625, 0.2611579895019531, -0.6343650817871094, 0.1580963134765625, 0.47882080078125, 0.13849639892578125, 0.30908203125, -0.4490394592285156, 0.388702392578125, 0.06353569030761719, 0.22271728515625, 0.1447601318359375, -0.2758331298828125, 0.3060111999511719, 0.13562774658203125, -0.4397125244140625, 0.5529289245605469, -0.0923309326171875, 0.14300537109375, -0.7025146484375, -0.018829345703125, -0.15955543518066406, -0.7761611938476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000043.npy"}
|
||||
{"epoch": 0.06500377928949358, "step": 44, "batch_size": 64, "mean": -0.029954195022583008, "std": 0.43650779128074646, "min": -1.3294677734375, "p10": -0.5057613372802734, "median": -0.03580760955810547, "p90": 0.46365928649902355, "max": 1.348785400390625, "pos_frac": 0.46875, "sample": [-0.27431488037109375, -0.12941360473632812, 0.0122833251953125, 0.170654296875, 0.4226837158203125, -0.28875732421875, 0.1573486328125, -0.16353607177734375, -0.5219383239746094, -0.5045623779296875, -0.2746429443359375, 0.07805633544921875, -0.490814208984375, -0.0439453125, 0.05998992919921875, -0.30028533935546875, -0.16480064392089844, 0.11463165283203125, 0.06989097595214844, 0.2502002716064453, 0.060894012451171875, 0.5323905944824219, 0.27636146545410156, -0.248260498046875, -0.014972686767578125, -0.2078094482421875, 0.486663818359375, -0.6600265502929688, -0.46539306640625, 0.13458251953125, 0.069732666015625, -0.08744430541992188, 0.24315643310546875, 0.4745941162109375, -0.027669906616210938, -0.11233901977539062, 1.11993408203125, -0.06686019897460938, -0.378143310546875, -0.28338623046875, -0.28072166442871094, -0.06206512451171875, 0.136749267578125, -0.4173736572265625, 0.22418975830078125, 0.4381446838378906, -0.311187744140625, 0.113861083984375, -0.7220916748046875, 0.1546039581298828, -1.3294677734375, 0.2371978759765625, 0.54412841796875, -0.055572509765625, -0.5062751770019531, 0.06708526611328125, 0.7457351684570312, 1.348785400390625, 0.35768699645996094, 0.39995574951171875, -0.15643310546875, -0.5997695922851562, -0.32874298095703125, -0.9402236938476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000044.npy"}
|
||||
{"epoch": 0.06651549508692366, "step": 45, "batch_size": 64, "mean": 0.10358336567878723, "std": 0.3609749674797058, "min": -1.0147323608398438, "p10": -0.3483234405517578, "median": 0.13942241668701172, "p90": 0.5595296859741211, "max": 0.981109619140625, "pos_frac": 0.609375, "sample": [0.2619590759277344, -0.21944427490234375, 0.021900177001953125, -0.08914947509765625, -0.3510780334472656, -0.36447906494140625, -0.21056556701660156, 0.0787811279296875, -0.2568359375, -1.0147323608398438, 0.13372802734375, 0.2256927490234375, 0.19536209106445312, 0.17464447021484375, 0.08038711547851562, -0.24745559692382812, -0.04282379150390625, -0.34189605712890625, 0.3002471923828125, 0.435577392578125, 0.145477294921875, -0.22170257568359375, 0.5672550201416016, 0.009191513061523438, 0.3531951904296875, 0.8873748779296875, 0.5722503662109375, 0.2018718719482422, 0.661102294921875, 0.2785797119140625, 0.3570823669433594, 0.59735107421875, -0.48883056640625, 0.3037223815917969, 0.70416259765625, 0.981109619140625, -0.536102294921875, 0.37799072265625, -0.186370849609375, -0.027385711669921875, -0.6271095275878906, 0.4934539794921875, 0.22125244140625, -0.0944976806640625, -0.01007843017578125, 0.14511680603027344, -0.206207275390625, 0.3443317413330078, -0.171478271484375, 0.3070831298828125, 0.098052978515625, 0.2892169952392578, 0.3913116455078125, 0.40294647216796875, -0.05002593994140625, 0.123748779296875, 0.1760101318359375, -0.03002166748046875, 0.1837615966796875, -0.05799102783203125, -0.013916015625, 0.54150390625, 0.22373199462890625, -0.3580055236816406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000045.npy"}
|
||||
{"epoch": 0.06802721088435375, "step": 46, "batch_size": 64, "mean": 0.16466832160949707, "std": 0.4576854705810547, "min": -1.04644775390625, "p10": -0.369427490234375, "median": 0.13013076782226562, "p90": 0.7658920288085939, "max": 1.5474853515625, "pos_frac": 0.640625, "sample": [0.430877685546875, -0.38751220703125, -0.7499465942382812, 0.52130126953125, 0.25311279296875, 0.063812255859375, -0.013528823852539062, -0.13683700561523438, 0.0279693603515625, -1.04644775390625, 0.4727001190185547, 0.08514785766601562, 0.1437835693359375, 0.448028564453125, -0.22524261474609375, 0.48639678955078125, -0.3846588134765625, 0.0162506103515625, 0.44937705993652344, 0.38349342346191406, 0.17763900756835938, 1.5474853515625, -0.14836692810058594, 0.24542999267578125, 0.5711097717285156, 0.442138671875, 0.7801971435546875, 0.921417236328125, 0.49676513671875, -0.0236968994140625, -0.11966705322265625, 0.37798309326171875, -0.2072601318359375, 0.24669647216796875, -0.14192962646484375, 0.2631072998046875, -0.01279449462890625, 0.207672119140625, -0.43556976318359375, 0.8079566955566406, 0.3171958923339844, -0.38320159912109375, 0.8695144653320312, 0.11647796630859375, 0.7828292846679688, -0.07947158813476562, -0.33728790283203125, -0.7070693969726562, 0.022464752197265625, 0.21041488647460938, 0.3810768127441406, 0.10529518127441406, 0.038326263427734375, 0.07923507690429688, 0.732513427734375, 1.2793350219726562, -0.32961273193359375, -0.0189056396484375, -0.22488784790039062, 0.6756134033203125, -0.17889404296875, 0.20511245727539062, -0.27783966064453125, 0.4261474609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000046.npy"}
|
||||
{"epoch": 0.06953892668178382, "step": 47, "batch_size": 64, "mean": 0.14839708805084229, "std": 0.40268126130104065, "min": -0.6961593627929688, "p10": -0.43878364562988276, "median": 0.17607402801513672, "p90": 0.630785369873047, "max": 1.1113662719726562, "pos_frac": 0.6875, "sample": [0.28231239318847656, 0.019805908203125, 0.058185577392578125, 0.2435455322265625, 0.9799041748046875, 0.21120643615722656, -0.11693191528320312, 1.1113662719726562, -0.008481979370117188, -0.36585235595703125, -0.027307510375976562, -0.21852493286132812, 0.1650543212890625, -0.6961593627929688, 0.167938232421875, 0.2103424072265625, 0.3614158630371094, 0.0130157470703125, -0.4536895751953125, -0.5338821411132812, 0.23203277587890625, -0.53094482421875, -0.0627288818359375, 0.11863517761230469, 0.11029243469238281, 0.4178314208984375, 0.21832656860351562, -0.03138542175292969, -0.011470794677734375, 0.32248878479003906, -0.4040031433105469, 0.1779499053955078, 0.07483673095703125, 0.3959693908691406, -0.6829376220703125, 0.6850700378417969, 0.5765228271484375, 0.4228057861328125, 0.47142982482910156, 0.5075149536132812, 0.3016357421875, 0.26342010498046875, 0.28072357177734375, 0.313995361328125, 0.6468849182128906, 0.511993408203125, 0.3159942626953125, 0.17419815063476562, 0.4256744384765625, 0.37699317932128906, -0.4609527587890625, 0.13962554931640625, -0.319244384765625, -0.29743194580078125, -0.500091552734375, 0.8913726806640625, 0.021799087524414062, 0.7108154296875, 0.4276771545410156, 0.16161727905273438, 0.5932197570800781, 0.8582077026367188, -0.38232421875, -0.3698883056640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000047.npy"}
|
||||
{"epoch": 0.0710506424792139, "step": 48, "batch_size": 64, "mean": -0.014135152101516724, "std": 0.4666339159011841, "min": -1.3104248046875, "p10": -0.5133354187011718, "median": -0.05138206481933594, "p90": 0.46955566406250004, "max": 1.3673477172851562, "pos_frac": 0.4375, "sample": [0.14505767822265625, -0.05673980712890625, 0.3930988311767578, 0.27223968505859375, -0.11258697509765625, 0.7512550354003906, -0.939605712890625, 0.0040283203125, -0.49053382873535156, -0.050273895263671875, 0.17620086669921875, -0.0044612884521484375, -0.4586334228515625, -1.0297622680664062, 0.0020751953125, 0.557647705078125, 0.3938255310058594, -0.09139442443847656, -1.3104248046875, 0.45194244384765625, -0.12310791015625, 0.13472366333007812, -0.29524993896484375, 0.14374923706054688, 0.24151992797851562, 0.47710418701171875, 0.3026123046875, 0.2766990661621094, -0.3509025573730469, 0.9495391845703125, -0.21230316162109375, 0.0547332763671875, -0.2927837371826172, 0.20093154907226562, -0.22791671752929688, 0.4087371826171875, -0.01981353759765625, -0.219451904296875, 0.9773750305175781, -0.2404937744140625, -0.11146163940429688, -0.07622528076171875, -0.10619354248046875, 0.31969261169433594, -0.9085693359375, 0.31999969482421875, -0.05593681335449219, -0.06900787353515625, -0.182342529296875, -0.32126808166503906, 0.3742103576660156, -0.6016998291015625, -0.4671344757080078, -0.030324935913085938, -0.5231075286865234, 0.382598876953125, 1.3673477172851562, -0.052490234375, -0.3096466064453125, -0.5486030578613281, 0.1969738006591797, 0.5000152587890625, -0.4434070587158203, -0.3467254638671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000048.npy"}
|
||||
{"epoch": 0.07256235827664399, "step": 49, "batch_size": 64, "mean": 0.08365324139595032, "std": 0.5019258260726929, "min": -1.4941253662109375, "p10": -0.46475448608398434, "median": 0.06426048278808594, "p90": 0.612896728515625, "max": 2.2264175415039062, "pos_frac": 0.578125, "sample": [0.176239013671875, 0.0627899169921875, 0.13968276977539062, 0.479034423828125, 0.36464691162109375, -0.09528732299804688, -0.05982017517089844, 0.08051872253417969, -0.13201141357421875, -0.3876800537109375, -0.013475418090820312, 0.20671463012695312, 0.3395271301269531, 0.39446258544921875, -0.3757209777832031, -0.24188995361328125, -0.4829254150390625, -0.00130462646484375, 0.3592395782470703, -0.7027587890625, 0.2634868621826172, 0.13451385498046875, 0.186004638671875, 0.07941436767578125, 0.0448455810546875, 0.1063385009765625, -0.06482696533203125, 0.04317283630371094, 0.4506645202636719, -0.645263671875, 0.24046707153320312, 0.28517913818359375, -0.22522735595703125, 0.6622638702392578, 0.28207969665527344, 1.15313720703125, 0.03539276123046875, 0.6021728515625, 0.40869140625, 0.9691162109375, 0.269012451171875, -0.19696807861328125, -0.1706523895263672, 0.61749267578125, -0.42235565185546875, -0.17236328125, -0.167510986328125, 0.20922088623046875, -0.04408836364746094, 0.053661346435546875, 0.5480880737304688, 0.6450576782226562, -1.4941253662109375, -0.6331939697265625, -0.5059623718261719, -0.5607795715332031, 0.06573104858398438, 2.2264175415039062, 0.11344528198242188, -0.10281181335449219, -0.051300048828125, -0.25376129150390625, 0.6352691650390625, -0.3753204345703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000049.npy"}
|
||||
{"epoch": 0.07407407407407407, "step": 50, "batch_size": 64, "mean": 0.12176531553268433, "std": 0.41116270422935486, "min": -0.850677490234375, "p10": -0.38632183074951165, "median": 0.1366262435913086, "p90": 0.6218551635742189, "max": 0.9910430908203125, "pos_frac": 0.609375, "sample": [0.6310958862304688, 0.9887161254882812, 0.4409637451171875, 0.16353988647460938, 0.08036422729492188, 0.6010589599609375, 0.4363517761230469, -0.6743316650390625, 0.9910430908203125, 0.5427665710449219, 0.012853622436523438, 0.44748687744140625, -0.19971466064453125, -0.15045928955078125, 0.1474151611328125, -0.3497028350830078, 0.2101593017578125, 0.3132362365722656, 0.4184761047363281, 0.16178131103515625, 0.41289520263671875, -0.061298370361328125, -0.10882377624511719, -0.01944732666015625, 0.41573333740234375, 0.803314208984375, -0.01485443115234375, 0.41808319091796875, -0.08023452758789062, -0.07625579833984375, 0.23956298828125, 0.5431232452392578, 0.31226348876953125, 0.37416648864746094, -0.850677490234375, -0.19873046875, 0.35107421875, 0.8807907104492188, 0.1282482147216797, -0.11556243896484375, 0.41355133056640625, 0.872772216796875, 0.1141357421875, -0.31223297119140625, 0.0768280029296875, 0.23505210876464844, 0.630767822265625, -0.07772064208984375, -0.6317138671875, -0.1992950439453125, 0.1682605743408203, -0.1469554901123047, 0.03348350524902344, 0.1450042724609375, 0.5016365051269531, -0.5419960021972656, -0.44037628173828125, -0.3295135498046875, -0.290771484375, -0.18071365356445312, 0.19548416137695312, -0.6837387084960938, 0.07657623291015625, -0.40201568603515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000050.npy"}
|
||||
{"epoch": 0.07558578987150416, "step": 51, "batch_size": 64, "mean": 0.18919536471366882, "std": 0.42100009322166443, "min": -0.7928466796875, "p10": -0.2997493743896484, "median": 0.1123046875, "p90": 0.761277770996094, "max": 1.572235107421875, "pos_frac": 0.734375, "sample": [0.68658447265625, 0.22397804260253906, 0.030385971069335938, -0.1420135498046875, 0.33636474609375, 0.7176132202148438, 0.2299041748046875, 0.05927848815917969, -0.5694408416748047, 0.065826416015625, 0.026063919067382812, 0.6918067932128906, -0.10898971557617188, 0.951263427734375, -0.40276336669921875, 0.02716064453125, 0.4903564453125, 0.1777801513671875, 0.3076210021972656, 1.047576904296875, 0.2223663330078125, 0.22762298583984375, 0.4173927307128906, 0.2463836669921875, 0.0156707763671875, 0.0600433349609375, 0.05042266845703125, -0.43994140625, 0.28905487060546875, 0.2702522277832031, 0.03119659423828125, 0.5767974853515625, 0.07209968566894531, 0.876983642578125, -0.7928466796875, -0.19982147216796875, 0.048740386962890625, -0.16515731811523438, -0.2623786926269531, 0.40164947509765625, 0.1622467041015625, -0.315765380859375, 0.06253433227539062, 0.4646148681640625, 0.13518524169921875, -0.08427810668945312, -0.1567230224609375, 1.572235107421875, -0.204376220703125, 0.7799911499023438, 0.5492420196533203, 0.6068344116210938, 0.7887725830078125, 0.07798004150390625, 0.08942413330078125, -0.17217445373535156, 0.2761955261230469, 0.9040031433105469, -0.047039031982421875, 0.2542877197265625, -0.3284187316894531, 0.3756866455078125, 0.03705787658691406, -0.51190185546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000051.npy"}
|
||||
{"epoch": 0.07709750566893424, "step": 52, "batch_size": 64, "mean": 0.19624871015548706, "std": 0.5069024562835693, "min": -0.860595703125, "p10": -0.41874504089355463, "median": 0.18982315063476562, "p90": 1.0338897705078127, "max": 1.35601806640625, "pos_frac": 0.59375, "sample": [1.2299041748046875, 0.251373291015625, 0.9828338623046875, 0.2883148193359375, 0.27260589599609375, -0.5681838989257812, 0.43561744689941406, -0.2128753662109375, -0.3383197784423828, 1.0557708740234375, 1.2254486083984375, 0.7791976928710938, -0.5075531005859375, 1.2034530639648438, 1.060943603515625, -0.16712188720703125, 0.48162078857421875, -0.2706756591796875, -0.3117713928222656, -0.08317184448242188, -0.5245742797851562, -0.19860076904296875, 0.2048492431640625, -0.058666229248046875, 0.5920562744140625, 0.405029296875, 0.17479705810546875, 0.004871368408203125, -0.4712066650390625, 0.22745132446289062, 0.3930320739746094, 0.225311279296875, 0.06211090087890625, -0.22290802001953125, 0.28555870056152344, 1.35601806640625, 0.3357276916503906, 0.04584312438964844, 0.38831329345703125, -0.36376953125, -0.013998031616210938, -0.02898406982421875, 0.10438156127929688, 0.41977882385253906, -0.06102943420410156, 0.5174217224121094, -0.20859718322753906, 0.3580322265625, 0.714813232421875, -0.0984344482421875, 0.62353515625, 0.00885772705078125, -0.4820556640625, 0.7393226623535156, 1.1080398559570312, -0.860595703125, 0.6672210693359375, -0.3892250061035156, -0.2633056640625, 0.2681713104248047, -0.057464599609375, 0.2720470428466797, -0.0152740478515625, -0.431396484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000052.npy"}
|
||||
{"epoch": 0.07860922146636433, "step": 53, "batch_size": 64, "mean": 0.15706408023834229, "std": 0.3923652172088623, "min": -0.7756500244140625, "p10": -0.36396026611328125, "median": 0.21462059020996094, "p90": 0.6657470703125002, "max": 0.9386215209960938, "pos_frac": 0.625, "sample": [0.4950599670410156, 0.38027191162109375, 0.8611373901367188, 0.27298736572265625, 0.4138660430908203, 0.4603080749511719, 0.07037734985351562, 0.5863189697265625, 0.1263561248779297, -0.364898681640625, 0.036651611328125, -0.08447265625, 0.4588775634765625, 0.41872406005859375, -0.3617706298828125, -0.15832138061523438, 0.37407684326171875, 0.6950531005859375, -0.09370803833007812, -0.5119705200195312, 0.2490978240966797, -0.184814453125, 0.39775848388671875, 0.35807037353515625, -0.7756500244140625, -0.0441436767578125, -0.4499320983886719, 0.49620819091796875, 0.2871990203857422, 0.27690887451171875, -0.009263992309570312, -0.5806884765625, -0.08359909057617188, -0.6420822143554688, 0.25313568115234375, 0.746612548828125, 0.7487621307373047, 0.9386215209960938, -0.17483139038085938, -0.30809783935546875, -0.22402572631835938, -0.3968505859375, 0.1801433563232422, -0.306884765625, 0.30252838134765625, 0.6944580078125, 0.28862762451171875, 0.5791854858398438, 0.4319648742675781, 0.5987548828125, 0.05008697509765625, 0.3822746276855469, 0.027666091918945312, 0.7986068725585938, 0.43552398681640625, -0.13733291625976562, -0.027252197265625, -0.21225357055664062, 0.42487335205078125, 0.08585166931152344, -0.10550689697265625, 0.4737396240234375, -0.021223068237304688, 0.15494918823242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000053.npy"}
|
||||
{"epoch": 0.0801209372637944, "step": 54, "batch_size": 64, "mean": 0.1536046266555786, "std": 0.49617016315460205, "min": -1.0708541870117188, "p10": -0.4323081970214844, "median": 0.159820556640625, "p90": 0.737411499023438, "max": 1.91778564453125, "pos_frac": 0.625, "sample": [0.8002967834472656, 1.0869140625, -0.5143814086914062, 0.3322486877441406, 0.1239776611328125, -0.2613067626953125, 0.4394054412841797, 0.39684295654296875, 0.5435523986816406, -0.7679710388183594, -0.2662696838378906, -0.14548301696777344, 0.8785400390625, -1.0708541870117188, 0.0821533203125, 0.22980499267578125, 0.5718536376953125, 0.4332847595214844, 0.40988922119140625, -0.041259765625, 0.140594482421875, 1.91778564453125, 0.3360443115234375, 0.43187713623046875, -0.0940093994140625, 0.87359619140625, 0.35045433044433594, -0.272674560546875, 0.4153900146484375, -0.262237548828125, -0.2415771484375, 0.13390350341796875, -0.43611907958984375, -0.4834556579589844, 0.2784862518310547, -0.0686798095703125, 0.41851043701171875, -0.23578643798828125, 0.5906791687011719, 1.0632553100585938, 0.4358234405517578, -0.11434173583984375, 0.22179031372070312, 0.5477981567382812, -0.005008697509765625, 0.2978038787841797, 0.179046630859375, 0.211029052734375, 0.024713516235351562, 0.4940605163574219, 0.04071044921875, -0.8931503295898438, 0.2429962158203125, 0.2178192138671875, 0.5030193328857422, -0.03236198425292969, 0.03199577331542969, -0.13414382934570312, 0.8260765075683594, -0.4234161376953125, -0.17240142822265625, -0.29439544677734375, 0.11055755615234375, -0.60260009765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000054.npy"}
|
||||
{"epoch": 0.08163265306122448, "step": 55, "batch_size": 64, "mean": 0.24859526753425598, "std": 0.6147000193595886, "min": -1.0181121826171875, "p10": -0.3910293579101562, "median": 0.14027023315429688, "p90": 1.0181308746337892, "max": 2.25823974609375, "pos_frac": 0.640625, "sample": [1.0379791259765625, 0.089263916015625, 0.28292083740234375, -0.31634521484375, -0.019693374633789062, 0.078338623046875, 0.7990837097167969, 0.04944610595703125, -0.26088714599609375, -0.162567138671875, 0.25815582275390625, 0.716461181640625, -0.858123779296875, -0.0810699462890625, 0.09454345703125, 0.6966018676757812, 1.0200386047363281, -0.1767730712890625, 0.4151763916015625, -0.1213836669921875, -0.007694244384765625, -0.346221923828125, 0.8260536193847656, 0.2818450927734375, 2.25823974609375, 0.19277191162109375, 0.4747734069824219, 1.100189208984375, 0.145355224609375, -0.2131805419921875, 0.37003135681152344, 0.952423095703125, 0.46466827392578125, -0.6542091369628906, -0.11647796630859375, -0.56280517578125, 0.19008827209472656, 0.20062255859375, 0.20159149169921875, -0.09394454956054688, 0.06230926513671875, 1.0136795043945312, 0.20449066162109375, 0.7708282470703125, 0.1169586181640625, 0.2515277862548828, 1.5347442626953125, -0.582763671875, -0.4102325439453125, -0.6969757080078125, 0.1055145263671875, 0.373870849609375, 0.13518524169921875, -0.18170928955078125, -0.015102386474609375, 0.7146987915039062, 0.7111129760742188, 0.07983207702636719, 0.7858352661132812, 2.0465850830078125, -0.04412841796875, -0.31494140625, 1.0616035461425781, -1.0181121826171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000055.npy"}
|
||||
{"epoch": 0.08314436885865457, "step": 56, "batch_size": 64, "mean": 0.18286418914794922, "std": 0.601912796497345, "min": -2.128448486328125, "p10": -0.42712554931640623, "median": 0.11057186126708984, "p90": 0.8998374938964847, "max": 1.5251083374023438, "pos_frac": 0.671875, "sample": [-0.45325469970703125, 0.4390449523925781, 0.024557113647460938, -0.5418777465820312, 0.0206298828125, -0.5478515625, 1.1786346435546875, -1.0991973876953125, -0.3606071472167969, 0.12696075439453125, 0.41289520263671875, -0.65020751953125, -0.1692962646484375, 0.10636138916015625, 0.7574462890625, 1.0814743041992188, -0.11328125, -0.3171577453613281, 0.2609100341796875, 0.9312057495117188, -0.4410438537597656, 1.5251083374023438, 0.6998348236083984, 0.0051422119140625, 0.1684894561767578, 0.7981452941894531, -0.08698654174804688, 0.30387115478515625, 0.11102104187011719, 0.8104190826416016, -0.039730072021484375, 0.7175216674804688, 0.7319488525390625, 0.7911510467529297, 0.25794219970703125, 0.1101226806640625, -2.128448486328125, 0.3216896057128906, 0.018077850341796875, 0.2956390380859375, 0.08449363708496094, 0.8266448974609375, -0.3470001220703125, -0.310211181640625, 0.07742118835449219, 0.4068145751953125, 0.00475311279296875, 0.17263031005859375, -0.023006439208984375, 0.34731101989746094, 0.5882530212402344, 0.7367820739746094, 1.1048355102539062, 0.4086494445800781, 1.3169403076171875, -0.3946495056152344, -0.3256492614746094, 0.002593994140625, 0.1414337158203125, -0.247528076171875, 0.07864761352539062, 1.2602005004882812, -0.010179519653320312, -0.2541770935058594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000056.npy"}
|
||||
{"epoch": 0.08465608465608465, "step": 57, "batch_size": 64, "mean": 0.3268115520477295, "std": 0.6056222319602966, "min": -0.948760986328125, "p10": -0.43114776611328126, "median": 0.40309906005859375, "p90": 0.8347869873046876, "max": 2.57757568359375, "pos_frac": 0.71875, "sample": [-0.492950439453125, -0.2394390106201172, 0.2787151336669922, 2.57757568359375, -0.1680908203125, 1.1311187744140625, 0.6913299560546875, -0.2956390380859375, 0.5692138671875, 0.42230224609375, 0.63641357421875, 0.904937744140625, -0.948760986328125, 0.24188232421875, 0.5866622924804688, -0.4758758544921875, 0.4072418212890625, 0.574127197265625, 1.8357315063476562, 0.3063850402832031, 0.69427490234375, -0.28659820556640625, 0.5257434844970703, 0.41668701171875, -0.025365829467773438, -0.8191299438476562, 0.2934417724609375, 0.4853363037109375, -0.19491958618164062, -0.817626953125, 0.7712020874023438, 0.5320091247558594, 0.6092033386230469, 0.533203125, 0.0119476318359375, 0.795684814453125, -0.4310302734375, 0.18336105346679688, -0.4311981201171875, 0.571044921875, 0.845947265625, -0.31197357177734375, -0.3719329833984375, 0.6702480316162109, -0.5044326782226562, 0.808746337890625, 0.7668609619140625, 0.48749542236328125, 0.6845016479492188, 0.4447154998779297, 0.4655914306640625, 0.36875152587890625, 0.398956298828125, 1.4029388427734375, 0.07073402404785156, 1.2795944213867188, 0.3022918701171875, 0.5779895782470703, 0.2118816375732422, 0.29077911376953125, 0.056667327880859375, -0.025274276733398438, 0.24625396728515625, -0.21154403686523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000057.npy"}
|
||||
{"epoch": 0.08616780045351474, "step": 58, "batch_size": 64, "mean": 0.22546246647834778, "std": 0.8357961177825928, "min": -1.2652664184570312, "p10": -0.6056575775146484, "median": 0.13785171508789062, "p90": 1.1195907592773438, "max": 4.126007080078125, "pos_frac": 0.625, "sample": [-1.2652664184570312, 0.6833953857421875, 0.8703689575195312, -0.22677230834960938, 0.497589111328125, -0.4283905029296875, 0.28437042236328125, 0.48589324951171875, 0.10274505615234375, -0.02545166015625, 0.4199485778808594, -0.4511604309082031, 1.382965087890625, -0.6890869140625, -0.5166702270507812, 0.08324813842773438, 1.3265304565429688, 0.14264678955078125, -0.09765625, 1.1221771240234375, 0.2504405975341797, 0.11862564086914062, 0.701690673828125, 0.31023406982421875, -0.9333381652832031, -0.5685882568359375, -0.5021820068359375, -0.032196044921875, -0.5893688201904297, 0.41236114501953125, -0.4979095458984375, -0.8909683227539062, 0.3459281921386719, 0.27123260498046875, 4.126007080078125, -0.45180511474609375, 0.133056640625, 1.9269256591796875, 1.3270034790039062, 0.4819984436035156, 0.1496734619140625, 0.05457496643066406, 0.19314193725585938, 0.12722015380859375, -1.0597763061523438, 2.2186279296875, -0.6126384735107422, -0.6636581420898438, 0.10338973999023438, 0.62750244140625, -0.1756439208984375, 0.959014892578125, -0.22914886474609375, -0.2712745666503906, -0.14002227783203125, 1.0415115356445312, 0.22423934936523438, 0.08740997314453125, -0.30370330810546875, 1.113555908203125, 0.36100006103515625, 0.14856719970703125, 0.4226722717285156, 0.41279029846191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000058.npy"}
|
||||
{"epoch": 0.08767951625094482, "step": 59, "batch_size": 64, "mean": 0.2712229788303375, "std": 0.6968890428543091, "min": -0.917755126953125, "p10": -0.5000999450683593, "median": 0.16393661499023438, "p90": 1.1200553894042973, "max": 2.8074188232421875, "pos_frac": 0.625, "sample": [0.0024871826171875, 0.5112762451171875, 0.0074615478515625, -0.3857688903808594, -0.0408782958984375, -0.37401580810546875, 0.37702178955078125, -0.75958251953125, 0.6699867248535156, 0.70306396484375, 2.3375244140625, 0.5976295471191406, 0.83233642578125, 1.0037002563476562, 0.4942359924316406, -0.04153251647949219, 1.254730224609375, -0.8790664672851562, -0.082122802734375, 0.470977783203125, 0.43273162841796875, 0.163177490234375, -0.14179229736328125, -0.2748908996582031, -0.1121978759765625, 0.48038482666015625, -0.3186988830566406, 0.5986557006835938, 1.687713623046875, 0.5072097778320312, 0.0510711669921875, 0.34372711181640625, 0.37468528747558594, -0.05446624755859375, -0.917755126953125, -0.12221717834472656, -0.11321640014648438, 0.34891510009765625, -0.0767822265625, -0.1741943359375, 2.8074188232421875, 0.16469573974609375, 1.2148666381835938, 0.2746295928955078, -0.2057819366455078, 0.17081832885742188, -0.5490989685058594, 0.7595806121826172, -0.027034759521484375, 1.6586456298828125, -0.8500328063964844, 0.703216552734375, 0.007221221923828125, -0.656463623046875, -0.24550628662109375, 0.06467437744140625, -0.5982131958007812, 0.3854522705078125, 0.2353515625, 0.117462158203125, 0.4103660583496094, 1.169921875, 0.9122238159179688, 0.05233192443847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000059.npy"}
|
||||
{"epoch": 0.08919123204837491, "step": 60, "batch_size": 64, "mean": 0.04354393482208252, "std": 0.7077206969261169, "min": -2.1901397705078125, "p10": -0.9232803344726561, "median": 0.15504741668701172, "p90": 0.8128330230712891, "max": 1.429718017578125, "pos_frac": 0.578125, "sample": [0.07152175903320312, -0.016448974609375, 0.4995002746582031, 0.17736053466796875, -0.26076507568359375, 0.6412429809570312, -1.4505157470703125, -0.680511474609375, 0.4336280822753906, -1.1775665283203125, 0.3187599182128906, 0.4610443115234375, -0.12583541870117188, -0.26350975036621094, -1.23040771484375, 0.8021278381347656, -0.3832817077636719, 0.6852989196777344, 1.429718017578125, 0.347381591796875, -0.9644927978515625, -0.06781768798828125, 0.27399444580078125, -0.827117919921875, 0.5924797058105469, 0.38181304931640625, 0.506134033203125, 0.06277084350585938, 0.620513916015625, -0.7537498474121094, 0.1327342987060547, -0.28141021728515625, 0.8345794677734375, 0.0408477783203125, -0.22925949096679688, 0.34755706787109375, 0.8174209594726562, -0.4913673400878906, -0.3602142333984375, -0.6187934875488281, -0.79888916015625, 0.9710483551025391, 0.7036819458007812, -1.2353248596191406, 0.47896575927734375, 0.8183746337890625, 0.24278640747070312, -1.190887451171875, 0.355682373046875, -0.11959266662597656, 0.6760177612304688, 0.08936309814453125, 0.3006744384765625, 0.5645065307617188, 1.353912353515625, -0.12189674377441406, 0.36260223388671875, 0.4232635498046875, 0.5021133422851562, -0.044841766357421875, -2.1901397705078125, -0.7833175659179688, 1.1489143371582031, -0.015569686889648438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000060.npy"}
|
||||
{"epoch": 0.09070294784580499, "step": 61, "batch_size": 64, "mean": 0.10737112164497375, "std": 0.7111698389053345, "min": -1.6122589111328125, "p10": -0.7391540527343747, "median": 0.04860115051269531, "p90": 1.1011940002441407, "max": 1.6931610107421875, "pos_frac": 0.53125, "sample": [-0.20111083984375, 0.1627655029296875, 1.6505470275878906, 1.1962966918945312, -0.053028106689453125, 0.08518791198730469, 0.37053680419921875, 1.3600654602050781, 0.47747802734375, 0.07874679565429688, -0.0986480712890625, -0.4502696990966797, -0.27173614501953125, 0.30963134765625, 0.5892410278320312, -0.2276172637939453, 0.7094879150390625, -0.2480621337890625, 0.059661865234375, 0.4569549560546875, -0.21161842346191406, 0.4523353576660156, 0.279998779296875, -0.2655200958251953, -0.11479759216308594, -0.42682838439941406, -0.090301513671875, -0.52099609375, -0.1053009033203125, -0.08573532104492188, 0.8188285827636719, -0.310333251953125, -0.8269577026367188, 1.073150634765625, -0.1787261962890625, 0.6225051879882812, 0.05976104736328125, -0.5195503234863281, 0.15676116943359375, 1.0400848388671875, 1.3213653564453125, -1.257568359375, -0.070404052734375, -1.2639579772949219, 0.21024322509765625, 1.1132125854492188, 0.17593002319335938, 0.16716957092285156, -0.4388427734375, -0.1623249053955078, 1.3835678100585938, -1.1873550415039062, 0.037540435791015625, 0.01967620849609375, 0.5735549926757812, -1.6122589111328125, 1.6931610107421875, 0.619598388671875, -0.04212188720703125, -0.5342788696289062, 0.8082351684570312, 0.6619720458984375, -1.3195228576660156, -0.827728271484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000061.npy"}
|
||||
{"epoch": 0.09221466364323508, "step": 62, "batch_size": 64, "mean": 0.2133486270904541, "std": 0.7109352946281433, "min": -2.3311614990234375, "p10": -0.5563556671142578, "median": 0.21869468688964844, "p90": 1.2715789794921877, "max": 2.0924072265625, "pos_frac": 0.625, "sample": [1.4171524047851562, -0.7863845825195312, -0.49846649169921875, 0.26242828369140625, 0.4898223876953125, -0.3302154541015625, 1.2406463623046875, -0.2172107696533203, -0.9626312255859375, -0.280853271484375, 1.3654098510742188, 0.12491035461425781, 1.292236328125, -0.9394073486328125, 0.5216751098632812, 0.3845977783203125, 0.5740737915039062, -0.0343017578125, 0.2306499481201172, 0.5711746215820312, -0.8372421264648438, 0.08296585083007812, 0.21307373046875, 0.3335380554199219, 0.19425582885742188, 0.0705108642578125, -0.19701766967773438, 0.632904052734375, 0.4450187683105469, 0.47341156005859375, -0.9341812133789062, -0.13826560974121094, -0.022031784057617188, -0.3720703125, 1.4716415405273438, -2.3311614990234375, -0.00255584716796875, 0.6545333862304688, -0.18149375915527344, -0.0923919677734375, 0.22431564331054688, 1.105133056640625, 0.4271049499511719, 0.2756195068359375, 0.7525444030761719, 0.27426910400390625, 0.388214111328125, -0.5811653137207031, 0.12678909301757812, 1.2848358154296875, -0.16421127319335938, 0.6273956298828125, 0.47423553466796875, 0.32022857666015625, -0.29375457763671875, -0.1822357177734375, 2.0924072265625, 1.587677001953125, 0.053577423095703125, -0.046356201171875, 0.6464195251464844, 0.056232452392578125, -0.274444580078125, 0.5907325744628906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000062.npy"}
|
||||
{"epoch": 0.09372637944066516, "step": 63, "batch_size": 64, "mean": 0.4742545485496521, "std": 0.6120426058769226, "min": -0.805389404296875, "p10": -0.19462242126464832, "median": 0.388031005859375, "p90": 1.2034255981445312, "max": 2.887420654296875, "pos_frac": 0.859375, "sample": [0.0652008056640625, 0.30266571044921875, 0.032470703125, -0.044384002685546875, 1.793487548828125, -0.07013893127441406, -0.2479724884033203, 2.887420654296875, 0.5656509399414062, 0.3069267272949219, -0.805389404296875, 0.1084747314453125, 0.09092521667480469, -0.7970123291015625, 1.2053070068359375, 0.32668304443359375, 0.829071044921875, -0.297576904296875, 0.39984893798828125, 0.7594318389892578, -0.27780914306640625, 0.11901473999023438, 0.424591064453125, 1.50897216796875, 1.0604171752929688, 0.5962753295898438, 0.5175018310546875, 0.28220558166503906, 0.48649024963378906, 0.5867233276367188, 0.5770645141601562, 0.6268157958984375, 1.000885009765625, 0.509552001953125, 0.2350311279296875, 0.046051025390625, 0.6242256164550781, -0.3953704833984375, 0.1526775360107422, 0.37047386169433594, 0.2856311798095703, 1.52557373046875, 0.04352855682373047, 1.44561767578125, 0.37621307373046875, 0.5866909027099609, 1.19903564453125, 0.9770622253417969, 0.7039566040039062, 0.14379119873046875, 0.2571601867675781, 0.7556381225585938, 0.31365966796875, -0.37896728515625, 0.8882064819335938, 0.08782958984375, 0.7581710815429688, 0.4700336456298828, 0.013578414916992188, 0.15334701538085938, 0.9154891967773438, 1.589691162109375, 0.60491943359375, 0.173553466796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000063.npy"}
|
||||
{"epoch": 0.09523809523809523, "step": 64, "batch_size": 64, "mean": 0.10503333806991577, "std": 0.6164445877075195, "min": -1.9534912109375, "p10": -0.37122001647949215, "median": 0.082763671875, "p90": 0.7647533416748049, "max": 1.947296142578125, "pos_frac": 0.578125, "sample": [0.9019546508789062, 1.947296142578125, -0.5089626312255859, -0.3607444763183594, -0.2406768798828125, 0.31780242919921875, 0.7906856536865234, 0.08208847045898438, 0.4375801086425781, 0.95538330078125, 0.08343887329101562, 0.1480560302734375, 0.6397762298583984, -0.2172698974609375, 0.37066650390625, 0.06879234313964844, 0.024505615234375, 0.00698089599609375, 0.359619140625, 0.43634796142578125, 1.1976165771484375, -0.27010345458984375, -0.068359375, -0.1637115478515625, -0.30564117431640625, -0.060283660888671875, 0.13990020751953125, -1.9534912109375, -0.6998329162597656, 0.5249443054199219, 0.11278724670410156, -0.37570953369140625, -0.22817230224609375, -0.5246734619140625, 0.23261642456054688, 0.48107147216796875, 0.2952308654785156, -1.9381866455078125, -0.39868927001953125, 0.5599784851074219, 0.402252197265625, 0.30577850341796875, 0.6466903686523438, 0.20627784729003906, 0.34821319580078125, 0.1275787353515625, -0.15964508056640625, -0.2014923095703125, -0.334747314453125, 1.483489990234375, -0.31229400634765625, -0.227294921875, 1.0020904541015625, 0.04274749755859375, 0.2209644317626953, 0.31993675231933594, 0.7042446136474609, -0.11957931518554688, -0.3277740478515625, -0.1372833251953125, -0.2925262451171875, -0.31679534912109375, -0.1546955108642578, 0.6953849792480469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000064.npy"}
|
||||
{"epoch": 0.09674981103552532, "step": 65, "batch_size": 64, "mean": 0.5030020475387573, "std": 0.850088357925415, "min": -2.0625343322753906, "p10": -0.35960845947265624, "median": 0.4668874740600586, "p90": 1.6274251937866213, "max": 3.188873291015625, "pos_frac": 0.734375, "sample": [0.4772777557373047, 0.5580711364746094, 1.1723785400390625, 0.17159271240234375, 0.05003547668457031, 0.7361259460449219, -0.3746185302734375, 1.46063232421875, -0.26801490783691406, 0.8888816833496094, -0.2109527587890625, 0.629241943359375, 1.1701335906982422, 0.521209716796875, 0.31885528564453125, 0.5088062286376953, 0.6989650726318359, 0.5080680847167969, -0.3632659912109375, 2.58343505859375, 0.2686309814453125, 0.651031494140625, -0.656097412109375, 1.6503677368164062, 1.573892593383789, 0.16042327880859375, -2.0625343322753906, -0.0076446533203125, -0.1217041015625, 0.402313232421875, 0.1739788055419922, 0.4564971923828125, -0.35107421875, -0.31429290771484375, 0.35544586181640625, 1.8632354736328125, 0.4214897155761719, 1.7764892578125, 1.2562026977539062, 0.9595451354980469, 1.4915122985839844, 0.10701751708984375, 1.7439804077148438, 0.802734375, -0.8183975219726562, -0.16179656982421875, 0.36930274963378906, -0.06386947631835938, -0.9054660797119141, 0.0834503173828125, 0.528289794921875, 0.6822967529296875, 0.103668212890625, 0.6381378173828125, -0.09575653076171875, 0.6322860717773438, 1.654144287109375, 0.07960891723632812, 1.0160980224609375, 1.1429061889648438, 1.1508560180664062, -0.781158447265625, 3.188873291015625, -0.08964157104492188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000065.npy"}
|
||||
{"epoch": 0.0982615268329554, "step": 66, "batch_size": 64, "mean": 0.2957572937011719, "std": 0.7512759566307068, "min": -1.6772537231445312, "p10": -0.5197067260742188, "median": 0.22879981994628906, "p90": 1.338530731201172, "max": 3.0325775146484375, "pos_frac": 0.703125, "sample": [0.37084197998046875, 1.7069091796875, 0.25537681579589844, 0.5175018310546875, 0.1032562255859375, 0.37096405029296875, 0.4027996063232422, 0.20844650268554688, 0.10684967041015625, 0.3993415832519531, -0.13453292846679688, 0.95916748046875, 1.4318084716796875, 0.32515716552734375, -0.059112548828125, -0.5131912231445312, -0.22592926025390625, 0.4803428649902344, 1.427459716796875, 1.2919235229492188, 0.37963104248046875, 1.6012077331542969, -0.5224990844726562, -0.22742462158203125, 0.1580352783203125, 0.5561923980712891, 0.42583656311035156, 0.7386932373046875, 0.6337966918945312, 0.337371826171875, 0.8294029235839844, 0.011430740356445312, -0.0561676025390625, 0.2516002655029297, 0.13886070251464844, -0.41098785400390625, 0.27808570861816406, 0.1861572265625, -0.6256904602050781, 0.10033798217773438, -1.100067138671875, -1.6772537231445312, 0.191802978515625, 0.06865692138671875, -0.39546966552734375, -0.5451126098632812, 0.9471473693847656, -0.452606201171875, 0.47137451171875, 0.6919517517089844, -0.07822227478027344, -0.1136474609375, 0.24915313720703125, 3.0325775146484375, 1.8839492797851562, -1.0164756774902344, 1.0952835083007812, -0.8073654174804688, 0.1938629150390625, 1.3585052490234375, 0.6344795227050781, -0.19568824768066406, 0.14739990234375, 0.134979248046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000066.npy"}
|
||||
{"epoch": 0.09977324263038549, "step": 67, "batch_size": 64, "mean": 0.23182040452957153, "std": 0.725062370300293, "min": -1.5284194946289062, "p10": -0.6204706192016601, "median": 0.23065662384033203, "p90": 0.982427978515625, "max": 1.9619140625, "pos_frac": 0.609375, "sample": [-0.46010589599609375, -0.6458740234375, -0.5854244232177734, -0.5805435180664062, -1.5284194946289062, 0.47339820861816406, 1.4986400604248047, 1.1102027893066406, 0.9620132446289062, -0.40770721435546875, 0.4093170166015625, -0.0169677734375, 0.0024261474609375, -0.717742919921875, 0.73760986328125, 0.6979808807373047, -0.3924579620361328, 0.8772735595703125, -0.36507415771484375, 0.260284423828125, 0.3750762939453125, 0.974395751953125, -1.308319091796875, 1.9619140625, 0.20102882385253906, -0.0521087646484375, 0.7177619934082031, -0.036041259765625, 0.9836273193359375, -0.9305000305175781, 0.472900390625, 0.4173126220703125, -0.4008750915527344, 0.9796295166015625, 0.6904830932617188, -0.013601303100585938, 0.15975570678710938, 0.6595897674560547, -0.6354904174804688, 0.7887306213378906, 1.8807373046875, -0.3580322265625, 0.545166015625, 0.8988189697265625, 0.4695777893066406, -0.30010223388671875, 0.1569061279296875, 0.32622337341308594, -0.18360137939453125, -0.24663925170898438, 0.4811286926269531, 1.8486328125, 0.1908588409423828, 0.16706275939941406, 0.5071945190429688, -0.5766448974609375, 0.0255889892578125, 0.36655426025390625, -0.6787815093994141, -0.46050262451171875, 1.4880828857421875, 0.37522125244140625, 0.7027587890625, -0.12380218505859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000067.npy"}
|
||||
{"epoch": 0.10128495842781557, "step": 68, "batch_size": 64, "mean": 0.2984488904476166, "std": 0.8238428831100464, "min": -2.082611083984375, "p10": -0.5989936828613281, "median": 0.2636222839355469, "p90": 0.9901998519897461, "max": 3.3032989501953125, "pos_frac": 0.625, "sample": [3.3032989501953125, 0.87066650390625, -0.05792236328125, 0.626922607421875, -0.611328125, 1.3238906860351562, 0.6486587524414062, -0.08257293701171875, 0.6741485595703125, -0.3743743896484375, 0.6765975952148438, -2.082611083984375, 0.9526824951171875, 1.3960418701171875, 0.003570556640625, 0.7820625305175781, -0.8423614501953125, -0.24617767333984375, -0.17961883544921875, 0.25408172607421875, 0.68829345703125, -0.170745849609375, 0.273162841796875, 0.9804840087890625, 0.18707275390625, 0.4207000732421875, 0.4995155334472656, -0.3409404754638672, 0.8862991333007812, -0.07244873046875, -0.1678180694580078, 0.30933380126953125, 0.24614715576171875, 0.4435577392578125, -0.9021072387695312, 0.9008102416992188, 2.826019287109375, 0.309539794921875, -0.12969207763671875, 0.16208267211914062, -0.41845703125, -0.8184127807617188, 0.8116912841796875, 0.6453208923339844, -0.7916793823242188, -0.5702133178710938, 0.529388427734375, -0.00531005859375, -0.56787109375, 0.21480560302734375, 0.785064697265625, 0.5625762939453125, -0.4869537353515625, 1.2519607543945312, 1.2113571166992188, 0.18041229248046875, 0.16998291015625, 0.6822967529296875, 0.9547576904296875, 0.9943637847900391, 0.8053703308105469, -0.1725311279296875, -0.36241912841796875, -0.8896942138671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000068.npy"}
|
||||
{"epoch": 0.10279667422524566, "step": 69, "batch_size": 64, "mean": 0.48882368206977844, "std": 0.8719438314437866, "min": -1.239105224609375, "p10": -0.4870872497558594, "median": 0.3258800506591797, "p90": 1.5120979309082032, "max": 3.150390625, "pos_frac": 0.75, "sample": [0.19541168212890625, 0.36539459228515625, 1.8847084045410156, 0.0658416748046875, 1.1494064331054688, -0.0829315185546875, -0.15797805786132812, 0.07183647155761719, 0.9794158935546875, 0.1776275634765625, 0.17941856384277344, 0.2585906982421875, 0.13932228088378906, 0.9742507934570312, 1.1035499572753906, 1.5051498413085938, 0.2891063690185547, 1.0024185180664062, 0.6335906982421875, -0.584014892578125, 2.8148193359375, -1.0700225830078125, 1.94921875, 0.24510574340820312, 1.51507568359375, -0.17070770263671875, 0.00311279296875, 1.1189193725585938, 1.02301025390625, 0.00478363037109375, 0.40694618225097656, 0.049957275390625, 0.5694427490234375, 0.15468597412109375, 0.8661842346191406, -0.49762725830078125, 0.86602783203125, 0.3091850280761719, 0.3425750732421875, 0.42328643798828125, -0.3229217529296875, 0.48418235778808594, 0.5245208740234375, -0.063201904296875, 1.220062255859375, -0.1815032958984375, 1.4893035888671875, 0.8685283660888672, -0.20459747314453125, -1.239105224609375, -0.8373336791992188, 0.7172508239746094, -1.0153388977050781, -0.6379852294921875, 1.3077354431152344, -0.462493896484375, 1.7478485107421875, 0.4286308288574219, 0.7889785766601562, -0.17618179321289062, 3.150390625, 0.09428977966308594, 2.4880828857421875, 0.04147911071777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000069.npy"}
|
||||
{"epoch": 0.10430839002267574, "step": 70, "batch_size": 64, "mean": 0.3124830424785614, "std": 0.8413992524147034, "min": -2.414459228515625, "p10": -0.7582096099853515, "median": 0.35813426971435547, "p90": 1.2845710754394533, "max": 1.9713134765625, "pos_frac": 0.6875, "sample": [0.34967041015625, -0.6524982452392578, 1.2249755859375, -0.33367919921875, 0.7992267608642578, -0.7758369445800781, -0.4197998046875, 0.6256790161132812, 1.3101119995117188, -0.35727882385253906, 0.26320648193359375, 0.3641319274902344, -2.414459228515625, 1.04766845703125, 0.024600982666015625, -0.5192947387695312, 0.9241714477539062, -1.6258544921875, -0.6566925048828125, 0.24965286254882812, 1.8840255737304688, 0.8805828094482422, 0.8872814178466797, 0.7299995422363281, -0.8570175170898438, 0.7225875854492188, 0.7321739196777344, 1.3958663940429688, -0.7170791625976562, 1.9713134765625, 0.11004447937011719, 0.9886207580566406, -0.24376678466796875, -0.22686767578125, 0.80120849609375, 1.3357925415039062, 1.1958389282226562, 0.619598388671875, 0.5801429748535156, 1.0886287689208984, 0.5867137908935547, -0.12006950378417969, 0.0815277099609375, 0.8484954833984375, 0.7050590515136719, 0.8839569091796875, -1.2904205322265625, 1.66961669921875, 0.35213661193847656, 0.52545166015625, 0.9063339233398438, 0.3356781005859375, 0.2578392028808594, -0.9393768310546875, 0.33907318115234375, -0.20556640625, 0.45185089111328125, -1.1672000885009766, 0.7993316650390625, 0.26900672912597656, 0.1519622802734375, -0.048984527587890625, -0.1568431854248047, 1.4566650390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000070.npy"}
|
||||
{"epoch": 0.10582010582010581, "step": 71, "batch_size": 64, "mean": 0.15820787847042084, "std": 0.9189498424530029, "min": -2.422515869140625, "p10": -1.0364959716796873, "median": 0.18383312225341797, "p90": 1.1150405883789063, "max": 2.4024658203125, "pos_frac": 0.625, "sample": [-0.1503429412841797, 1.0145416259765625, 0.7533550262451172, 0.8170795440673828, 0.13602066040039062, 0.0600433349609375, 1.1711959838867188, -0.8055419921875, 0.352752685546875, 2.4024658203125, 0.08841896057128906, -1.5220413208007812, 0.0391387939453125, -0.24846649169921875, -1.4742279052734375, -0.059856414794921875, -0.20932769775390625, 1.4014129638671875, 0.3020744323730469, 1.017578125, -2.2626953125, 1.5121688842773438, 0.5386314392089844, 0.2788047790527344, 1.0668888092041016, 1.405303955078125, 0.6129913330078125, 0.2316455841064453, 0.78179931640625, -0.23329925537109375, -2.422515869140625, -0.1711883544921875, -1.4394149780273438, -0.9129791259765625, -0.20296096801757812, 0.08007431030273438, 0.2594642639160156, 1.1149444580078125, 0.6004352569580078, -0.4427757263183594, 0.06830596923828125, -2.2323455810546875, 0.8011856079101562, 1.2466278076171875, -0.4026298522949219, -0.22021770477294922, -1.0894317626953125, 0.7152786254882812, -0.34033203125, -0.0053157806396484375, 1.115081787109375, 1.0018501281738281, 0.019073486328125, 0.9679794311523438, -0.04009246826171875, 0.5076332092285156, 0.5023612976074219, 0.0650634765625, -0.06982421875, 0.3185272216796875, 0.8185615539550781, 0.26833152770996094, 1.0445709228515625, -0.416534423828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000071.npy"}
|
||||
{"epoch": 0.1073318216175359, "step": 72, "batch_size": 64, "mean": 0.5723739862442017, "std": 0.8651069402694702, "min": -1.4065570831298828, "p10": -0.4584213256835937, "median": 0.4196605682373047, "p90": 1.6732875823974613, "max": 2.991456985473633, "pos_frac": 0.78125, "sample": [0.7605476379394531, 1.3989334106445312, 0.18902587890625, 0.9676513671875, -1.2484588623046875, 0.9437522888183594, 0.36624908447265625, 2.991456985473633, -1.4065570831298828, 0.0792999267578125, 1.5736885070800781, 1.4499282836914062, 0.231414794921875, 1.7229804992675781, 0.18761825561523438, -0.39606475830078125, -0.5434398651123047, 0.7594413757324219, 0.34732818603515625, 0.06757354736328125, 1.715972900390625, 0.4464588165283203, 1.515584945678711, 0.2941436767578125, 0.603057861328125, 0.15952301025390625, -0.026401519775390625, -0.8251895904541016, -0.48514556884765625, 0.030307769775390625, 2.5729293823242188, 0.9551906585693359, 0.9748992919921875, 2.0733642578125, -0.09954833984375, 0.13957595825195312, 0.5184192657470703, 0.8314285278320312, 0.6103172302246094, -0.00777435302734375, 1.3684463500976562, 1.0477733612060547, 1.2263717651367188, -0.0259857177734375, 1.2320709228515625, 0.7708168029785156, 0.5020599365234375, 0.22748565673828125, -0.6089591979980469, 1.0569496154785156, 0.1036834716796875, 0.833343505859375, 2.2423477172851562, -0.6864452362060547, 1.9770889282226562, 0.052570343017578125, 1.4874076843261719, 0.202484130859375, -0.347320556640625, 0.39286231994628906, -0.26861572265625, 0.9008560180664062, 0.38970947265625, 0.115447998046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000072.npy"}
|
||||
{"epoch": 0.10884353741496598, "step": 73, "batch_size": 64, "mean": 0.16396701335906982, "std": 0.9949995875358582, "min": -1.5760955810546875, "p10": -0.903125, "median": 0.13074302673339844, "p90": 1.163149642944336, "max": 3.05322265625, "pos_frac": 0.546875, "sample": [0.6804351806640625, 0.07165908813476562, 0.255279541015625, -0.8443145751953125, -0.9505062103271484, -0.5837249755859375, -0.9283294677734375, -1.2551727294921875, -1.5760955810546875, 1.0398807525634766, -0.478302001953125, -0.7936477661132812, 0.019254684448242188, -0.43384552001953125, -0.6628761291503906, 0.2959918975830078, -0.788665771484375, 0.3330268859863281, -0.083892822265625, 0.1159210205078125, -1.202423095703125, -0.5316238403320312, -0.5949687957763672, 0.37268829345703125, 3.05322265625, 1.1160507202148438, -0.15462493896484375, -0.3323345184326172, -0.1409912109375, 0.5483131408691406, 0.5817470550537109, -0.38227272033691406, 0.41846275329589844, 0.5631065368652344, -0.663787841796875, 0.30797767639160156, 1.167093276977539, 1.71160888671875, -1.3880367279052734, -0.697601318359375, -0.23406982421875, 2.685760498046875, -0.775665283203125, -0.60540771484375, -0.5727462768554688, 1.2367172241210938, 0.5391845703125, 0.21935081481933594, 0.6865730285644531, 0.7632522583007812, -1.3331756591796875, 1.1520805358886719, 1.1539478302001953, 0.6677932739257812, 0.9710273742675781, 0.17214202880859375, 1.0620841979980469, 0.32660675048828125, 0.38645172119140625, -0.18345260620117188, 0.14556503295898438, 2.5712966918945312, 2.690032958984375, -0.4151420593261719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000073.npy"}
|
||||
{"epoch": 0.11035525321239607, "step": 74, "batch_size": 64, "mean": 0.5314477682113647, "std": 0.8849409222602844, "min": -1.2542266845703125, "p10": -0.6013395309448242, "median": 0.5254802703857422, "p90": 1.6520278930664067, "max": 2.916015625, "pos_frac": 0.765625, "sample": [-0.33467864990234375, 0.5110321044921875, -0.8557891845703125, 0.1595611572265625, 0.6941604614257812, 0.09081268310546875, 0.8723373413085938, 2.916015625, 1.3943862915039062, 2.0700111389160156, 1.1388702392578125, 1.1626739501953125, 0.975830078125, 0.37848663330078125, -0.2424468994140625, 0.9629707336425781, 1.101593017578125, 0.1024017333984375, -0.23894309997558594, -0.2701263427734375, 0.9104461669921875, 1.0303955078125, 0.47332763671875, 0.5579071044921875, 0.4769134521484375, 0.06298828125, -0.49683380126953125, 0.31729888916015625, 1.7975616455078125, -1.2484893798828125, 0.6684284210205078, 0.7269191741943359, 1.119668960571289, -0.681365966796875, -0.5924568176269531, 0.7299423217773438, -0.6051464080810547, 0.27040863037109375, -1.2542266845703125, -0.34113121032714844, 0.6709709167480469, -1.0039863586425781, 1.1344680786132812, 0.762054443359375, 0.6691017150878906, 2.415252685546875, 2.2553024291992188, -0.11843299865722656, 1.70947265625, 1.4714431762695312, -1.0272674560546875, 0.023324966430664062, 0.8781814575195312, 0.2110137939453125, 0.05957794189453125, 0.2908134460449219, 0.03470611572265625, 1.376129150390625, 1.5179901123046875, 1.7238082885742188, 1.439382553100586, 0.5399284362792969, 0.012401580810546875, 0.4553070068359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000074.npy"}
|
||||
{"epoch": 0.11186696900982615, "step": 75, "batch_size": 64, "mean": 0.5959901809692383, "std": 0.9613094329833984, "min": -2.123889923095703, "p10": -0.44356422424316405, "median": 0.4311256408691406, "p90": 1.9505571365356447, "max": 2.962371826171875, "pos_frac": 0.734375, "sample": [-0.4083213806152344, -0.47777557373046875, 0.4010639190673828, 0.9051094055175781, 2.4101028442382812, -0.32199668884277344, 0.23465728759765625, 0.6929092407226562, 1.5721969604492188, 1.9616241455078125, -0.0183868408203125, 0.07909393310546875, -0.9977951049804688, 0.08585357666015625, 0.241485595703125, 1.393911361694336, 0.38980865478515625, -0.38962554931640625, 0.1020803451538086, 0.43120574951171875, 2.962371826171875, 0.9177093505859375, 2.17498779296875, 1.541961669921875, 0.6870613098144531, 0.9075775146484375, 1.6790313720703125, -0.4605712890625, 0.9898529052734375, 0.5785064697265625, 0.48439788818359375, 1.924734115600586, 1.14324951171875, 1.558816909790039, 1.0302581787109375, 0.02092742919921875, 2.02020263671875, 0.4310455322265625, 2.284942626953125, 1.113739013671875, -0.45670318603515625, 0.5845508575439453, -2.123889923095703, 0.7879657745361328, 1.1159210205078125, 0.3231620788574219, -0.4129066467285156, 0.062225341796875, 0.09011650085449219, -0.1117095947265625, 0.37267494201660156, 1.8042259216308594, -0.397735595703125, -0.4022674560546875, 0.38128662109375, -0.7043724060058594, 1.89678955078125, -0.36858367919921875, -0.4961395263671875, 0.69281005859375, 0.6285495758056641, -0.1281108856201172, 2.4163360595703125, 0.3111724853515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000075.npy"}
|
||||
{"epoch": 0.11337868480725624, "step": 76, "batch_size": 64, "mean": 0.28558364510536194, "std": 0.9524408578872681, "min": -1.5632705688476562, "p10": -0.9472972869873045, "median": 0.20663738250732422, "p90": 1.4082357406616217, "max": 3.028472900390625, "pos_frac": 0.609375, "sample": [-1.4709930419921875, 1.0926933288574219, -1.099151611328125, -0.0814208984375, 0.6606674194335938, -0.9980220794677734, 0.2215290069580078, 0.19174575805664062, 0.41379356384277344, 2.62164306640625, 1.4714374542236328, 3.028472900390625, -0.19407272338867188, -1.3680419921875, 0.2816619873046875, -1.5632705688476562, 0.8350982666015625, 1.080678939819336, 0.47442626953125, 0.3572196960449219, 0.6886138916015625, 1.8062515258789062, -0.5292739868164062, 2.0616378784179688, 0.4379730224609375, 1.0663414001464844, 0.0495147705078125, 1.64471435546875, -0.5514602661132812, -0.2683601379394531, 0.010892868041992188, -0.5833911895751953, -1.1032257080078125, 0.06060028076171875, -0.31757354736328125, 0.7043228149414062, 0.7354583740234375, 0.1324615478515625, -0.8289394378662109, 0.26070404052734375, 0.06977081298828125, -0.2575836181640625, -0.1388092041015625, -0.6146354675292969, 0.9556121826171875, 0.6606121063232422, 0.057262420654296875, -1.2895584106445312, -0.05405426025390625, 0.521270751953125, -0.41097259521484375, 0.7315750122070312, -0.395721435546875, 0.8007965087890625, 2.149505615234375, 1.117746353149414, -0.22085952758789062, -0.21775054931640625, -0.47312164306640625, 1.2501068115234375, 1.2607650756835938, -0.08425331115722656, 1.1468048095703125, 0.27948760986328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000076.npy"}
|
||||
{"epoch": 0.11489040060468632, "step": 77, "batch_size": 64, "mean": 0.4200243055820465, "std": 0.8718430995941162, "min": -1.8440780639648438, "p10": -0.594865608215332, "median": 0.40883541107177734, "p90": 1.56405029296875, "max": 2.5227203369140625, "pos_frac": 0.6875, "sample": [-0.1563854217529297, 0.6542549133300781, -0.19488906860351562, -0.7422332763671875, 0.8594207763671875, 1.6924057006835938, 0.06880950927734375, 0.39318275451660156, -1.8440780639648438, 0.2592315673828125, 1.5596923828125, 1.2849845886230469, 0.25229835510253906, -0.6186790466308594, 0.4244880676269531, 2.1318626403808594, 0.429168701171875, 1.4097213745117188, 0.6142730712890625, 1.7059097290039062, -0.2104034423828125, 0.15289688110351562, 1.9774246215820312, 0.7606849670410156, -0.09561920166015625, -0.195587158203125, 0.33422279357910156, 1.0463104248046875, -0.021450042724609375, 0.07502174377441406, -0.5393009185791016, -1.1599006652832031, 0.3004188537597656, 0.23340988159179688, 0.6779327392578125, 0.959747314453125, 0.6351470947265625, 1.0293464660644531, 0.7353172302246094, 1.2053604125976562, 0.45133209228515625, -0.4016609191894531, 0.4848747253417969, 0.6689662933349609, 0.52532958984375, 2.3809661865234375, 0.066558837890625, -0.22986221313476562, 2.5227203369140625, 0.312591552734375, 1.56591796875, -1.0721282958984375, 0.2587013244628906, 1.4063892364501953, 0.9424514770507812, -0.10153579711914062, -1.1171035766601562, -1.1396331787109375, -0.201904296875, -0.1658935546875, 0.595458984375, 0.8664474487304688, 0.6911067962646484, -0.5129547119140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000077.npy"}
|
||||
{"epoch": 0.1164021164021164, "step": 78, "batch_size": 64, "mean": 0.38572707772254944, "std": 0.8758112788200378, "min": -1.9784469604492188, "p10": -0.8397895812988281, "median": 0.4608116149902344, "p90": 1.3195011138916015, "max": 2.2603530883789062, "pos_frac": 0.671875, "sample": [1.1834659576416016, -0.8773651123046875, 0.5895004272460938, 1.1601600646972656, 0.6007232666015625, 2.2603530883789062, 1.6948585510253906, 0.04650115966796875, 0.38321685791015625, -0.8875045776367188, 1.326263427734375, -0.8951416015625, -0.22899246215820312, 0.97003173828125, -0.20416259765625, 0.3847503662109375, -1.0467681884765625, 2.1383056640625, -0.7521133422851562, 0.327911376953125, -0.060028076171875, -0.6061630249023438, 1.29833984375, 0.000797271728515625, 1.0160064697265625, 0.682830810546875, 0.6862640380859375, -1.9784469604492188, 0.9103012084960938, -1.045013427734375, 0.9844379425048828, 1.5103302001953125, 0.424560546875, 2.2143478393554688, 0.6196136474609375, -0.3403167724609375, -1.0091705322265625, 0.6727523803710938, 1.0549297332763672, 1.4296188354492188, 0.84234619140625, 0.22593307495117188, 1.1017608642578125, -0.428131103515625, -0.666961669921875, 1.0003585815429688, -0.7313232421875, 0.15596771240234375, 0.30321502685546875, 1.2314682006835938, -0.6302871704101562, 0.765411376953125, -0.0703277587890625, -0.31060028076171875, 0.2421875, 0.08742523193359375, -0.04561805725097656, -0.2532196044921875, 1.3037223815917969, 0.49706268310546875, 0.6753997802734375, 0.6012916564941406, 1.0894622802734375, 1.0600032806396484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000078.npy"}
|
||||
{"epoch": 0.11791383219954649, "step": 79, "batch_size": 64, "mean": 0.5819563865661621, "std": 1.0553133487701416, "min": -1.33575439453125, "p10": -0.5823089599609375, "median": 0.4553031921386719, "p90": 1.7005321502685558, "max": 4.420352935791016, "pos_frac": 0.6875, "sample": [0.09629631042480469, 0.38922882080078125, -0.7233428955078125, 1.202676773071289, -0.5714569091796875, 1.254638671875, -1.33575439453125, -0.2349681854248047, 1.0674514770507812, 0.03656768798828125, -0.5869598388671875, -0.2595062255859375, 1.3246498107910156, 4.420352935791016, -0.8019027709960938, 0.069488525390625, 1.3559494018554688, 0.4603233337402344, -0.5957088470458984, -0.253936767578125, 0.5973491668701172, 2.8508453369140625, 0.2483062744140625, 0.85211181640625, 0.4502830505371094, -0.8596324920654297, 2.677997589111328, 0.8602294921875, 1.0648040771484375, 1.0970077514648438, -0.0396270751953125, -0.7101707458496094, 0.8884124755859375, 1.4134674072265625, 1.4453887939453125, 0.1870746612548828, 0.8432807922363281, 0.7388763427734375, -0.20519256591796875, -0.51104736328125, -0.07687759399414062, 1.0062179565429688, 0.1385498046875, -0.17246627807617188, -0.40845298767089844, 0.5749454498291016, 0.9000167846679688, 0.35561180114746094, 0.5644207000732422, 0.6381912231445312, 0.5423202514648438, 0.2644805908203125, 2.1339111328125, -0.4276885986328125, -0.08057403564453125, 3.806854248046875, 0.8827743530273438, 0.37381744384765625, -0.0297393798828125, 1.8098793029785156, 0.7335662841796875, 1.2020912170410156, 2.1557235717773438, 0.15378570556640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000079.npy"}
|
||||
{"epoch": 0.11942554799697656, "step": 80, "batch_size": 64, "mean": 0.47282546758651733, "std": 1.0300968885421753, "min": -1.87677001953125, "p10": -0.7069835662841797, "median": 0.32538795471191406, "p90": 1.9537837982177735, "max": 2.5654296875, "pos_frac": 0.671875, "sample": [-0.340484619140625, 1.9650459289550781, 1.2959823608398438, 1.1043701171875, 1.2558670043945312, -1.87677001953125, 0.1091766357421875, 0.11756515502929688, -0.7019500732421875, -0.8249130249023438, 1.8230247497558594, 1.0322952270507812, -0.10564422607421875, -0.7091407775878906, -0.06503486633300781, 2.4647598266601562, 0.112518310546875, 0.8053970336914062, 0.5392265319824219, 0.36963653564453125, 0.3994255065917969, 0.24713134765625, -0.3344268798828125, 0.6996917724609375, 2.5654296875, 2.119565963745117, 1.48809814453125, 0.496612548828125, 2.4430084228515625, 2.1755218505859375, -0.097747802734375, 0.7721824645996094, 0.08061981201171875, 0.6021080017089844, 1.023590087890625, 1.0204315185546875, -0.5438461303710938, 0.0111236572265625, 1.417022705078125, -1.847076416015625, -0.3581085205078125, -0.7648849487304688, -1.04931640625, 1.7951240539550781, -0.31329917907714844, 0.47541046142578125, 1.9275054931640625, 0.2933921813964844, 0.35738372802734375, -1.2169303894042969, 1.7593841552734375, -0.17766761779785156, 0.27252197265625, 0.283172607421875, 0.07073402404785156, -0.1894512176513672, 0.6450653076171875, -0.151275634765625, 1.3793869018554688, -0.6176071166992188, 0.1416473388671875, -0.491546630859375, 2.350006103515625, 0.7307891845703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000080.npy"}
|
||||
{"epoch": 0.12093726379440665, "step": 81, "batch_size": 64, "mean": 0.5806014537811279, "std": 0.9155688285827637, "min": -0.7641830444335938, "p10": -0.33948001861572263, "median": 0.3623619079589844, "p90": 1.5963861465454103, "max": 4.176483154296875, "pos_frac": 0.765625, "sample": [0.7049884796142578, 0.6474704742431641, 1.4173851013183594, 0.03234100341796875, 0.8299179077148438, -0.7290573120117188, 0.5691318511962891, 0.06317138671875, -0.4450836181640625, 0.14180755615234375, 0.728912353515625, 0.1974029541015625, 0.0226287841796875, 1.4703445434570312, 0.11588096618652344, -0.00539398193359375, 0.1850128173828125, 0.1087799072265625, 0.22348785400390625, 1.5571346282958984, 0.366241455078125, 0.18367576599121094, 0.7787704467773438, -0.525360107421875, 1.571868896484375, 0.7869377136230469, 0.0058746337890625, 0.3726921081542969, -0.08664894104003906, -0.585418701171875, 0.9533348083496094, 0.4400177001953125, 1.0477981567382812, 0.053073883056640625, -0.21997833251953125, 1.7722625732421875, -0.5705718994140625, 4.176483154296875, 1.25616455078125, 2.2256622314453125, 0.08431243896484375, 0.6723251342773438, 0.8196334838867188, -0.15607452392578125, 1.4080276489257812, -0.34372520446777344, 1.606893539428711, 0.1304473876953125, 1.2608108520507812, 1.8485260009765625, 3.494112014770508, 0.44002532958984375, 1.3050365447998047, -0.3295745849609375, 0.167877197265625, -0.08315277099609375, -0.09658622741699219, 0.5518569946289062, 0.35848236083984375, 1.6185073852539062, 0.014463424682617188, 1.4695167541503906, -0.7641830444335938, -0.15820693969726562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000081.npy"}
|
||||
{"epoch": 0.12244897959183673, "step": 82, "batch_size": 64, "mean": 0.7579435110092163, "std": 0.7856361269950867, "min": -0.9953765869140625, "p10": -0.14317188262939454, "median": 0.7543134689331055, "p90": 1.7735176086425783, "max": 2.8256759643554688, "pos_frac": 0.828125, "sample": [0.8640670776367188, 1.3078937530517578, 0.6949653625488281, -0.05264091491699219, 1.381866455078125, 0.28290367126464844, 0.6463737487792969, 1.333648681640625, 1.936859130859375, 0.9825019836425781, 1.4397087097167969, 0.7143077850341797, 0.3359832763671875, 1.7101516723632812, 0.2662544250488281, 1.3973541259765625, 1.0574951171875, 0.9351081848144531, 1.3135261535644531, 0.5152130126953125, -0.08050537109375, 0.611724853515625, 0.5947799682617188, 0.2931480407714844, -0.25229644775390625, 1.1547927856445312, 0.9670047760009766, 1.033935546875, 1.285024642944336, 1.0062828063964844, 0.12726402282714844, -0.12281990051269531, 2.8256759643554688, 0.1837005615234375, 0.1455078125, 1.8006744384765625, 1.298309326171875, -0.34421539306640625, 1.2351913452148438, 2.7141475677490234, 0.3764190673828125, -0.9953765869140625, 1.5339813232421875, -0.42023468017578125, 1.0874481201171875, 0.24495315551757812, 1.4435768127441406, 0.21987152099609375, 2.16668701171875, 0.668792724609375, -0.90924072265625, 0.3045692443847656, 1.1312255859375, 1.8405685424804688, 0.075958251953125, 0.0075588226318359375, 0.5742683410644531, 0.7943191528320312, 1.8600196838378906, 0.8255596160888672, -0.1443328857421875, -0.4593658447265625, -0.14046287536621094, 0.8807525634765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000082.npy"}
|
||||
{"epoch": 0.12396069538926682, "step": 83, "batch_size": 64, "mean": 0.6912951469421387, "std": 1.0473483800888062, "min": -2.6539993286132812, "p10": -0.24832038879394527, "median": 0.6179084777832031, "p90": 1.5099807739257816, "max": 4.6141357421875, "pos_frac": 0.796875, "sample": [1.1826934814453125, 0.41595458984375, 0.6387405395507812, 0.478179931640625, 0.9540519714355469, -0.0335845947265625, 0.0457763671875, 0.43622589111328125, 1.210601806640625, 2.05328369140625, -0.5118942260742188, 0.25118255615234375, -0.9166336059570312, 0.7242774963378906, 1.2372760772705078, 0.35427093505859375, 0.1154632568359375, 1.1453170776367188, 0.8649253845214844, 0.8121376037597656, 1.3670578002929688, 0.5108680725097656, 0.989349365234375, -0.2660484313964844, -0.305328369140625, -0.0164794921875, 1.2674102783203125, 1.1533737182617188, 0.09946250915527344, -0.05167388916015625, 0.2485198974609375, 0.07516670227050781, 2.6407737731933594, -2.6539993286132812, -0.6219158172607422, 1.26800537109375, 1.1399574279785156, -0.8347015380859375, 0.5913028717041016, -0.2069549560546875, 0.2845458984375, 0.9617385864257812, 1.718353271484375, 0.9414272308349609, -0.028131484985351562, 0.49924278259277344, 0.2825775146484375, 0.6562347412109375, 0.597076416015625, 1.31414794921875, 4.335945129394531, 1.2753963470458984, 1.0104103088378906, 0.042682647705078125, 0.7168121337890625, 1.5483551025390625, 1.2326812744140625, 1.420440673828125, 1.2788009643554688, 4.6141357421875, 1.5546112060546875, 0.15961456298828125, 0.17467117309570312, -0.20127487182617188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000083.npy"}
|
||||
{"epoch": 0.1254724111866969, "step": 84, "batch_size": 64, "mean": 0.503605842590332, "std": 0.9145830273628235, "min": -2.217681884765625, "p10": -0.686154556274414, "median": 0.44801807403564453, "p90": 1.6524890899658204, "max": 2.328197479248047, "pos_frac": 0.71875, "sample": [-0.04274749755859375, 1.5203094482421875, 0.585296630859375, 1.5306358337402344, 1.4647674560546875, 1.2328033447265625, 2.0174789428710938, -0.5271949768066406, 0.21111679077148438, -0.57989501953125, -0.81561279296875, 1.8313789367675781, 0.7771492004394531, 0.9882125854492188, 0.8642120361328125, 0.11957168579101562, -0.13956451416015625, -0.3980255126953125, 1.3369216918945312, -1.1423454284667969, -0.3252105712890625, 0.81878662109375, 0.9289970397949219, 1.26788330078125, 0.3719005584716797, 0.09857177734375, -1.0458755493164062, 1.710113525390625, 0.28661537170410156, -0.02392578125, 1.3568038940429688, 0.2551116943359375, 0.2979850769042969, 1.59210205078125, 0.07335662841796875, 1.2716865539550781, 1.4693565368652344, 1.6720390319824219, 0.6280174255371094, -0.9351043701171875, -2.217681884765625, 1.60687255859375, 0.7685279846191406, 0.5241355895996094, 0.9094772338867188, -0.021532058715820312, 0.2626914978027344, 2.328197479248047, 0.05896759033203125, -0.70806884765625, 0.7563400268554688, -0.5219039916992188, 1.8207035064697266, 0.9169158935546875, 0.8481254577636719, 0.14026641845703125, 1.30218505859375, 0.3528861999511719, -0.7514801025390625, -0.0879058837890625, 1.7675247192382812, 0.13169097900390625, -0.6350212097167969, 0.0751800537109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000084.npy"}
|
||||
{"epoch": 0.12698412698412698, "step": 85, "batch_size": 64, "mean": 0.793880820274353, "std": 1.0594267845153809, "min": -2.047088623046875, "p10": -0.46495742797851547, "median": 0.8164949417114258, "p90": 2.0692153930664063, "max": 3.7000503540039062, "pos_frac": 0.8125, "sample": [2.0005970001220703, -0.212799072265625, 0.8063488006591797, 1.0877265930175781, 0.246795654296875, 1.5483779907226562, 0.40796661376953125, 0.5890007019042969, -0.2059192657470703, 1.8906669616699219, 2.4689407348632812, -2.047088623046875, 1.477090835571289, 2.581817626953125, 0.8554573059082031, 1.3840713500976562, 1.8633842468261719, 1.1825790405273438, 1.2222518920898438, 1.5862960815429688, 0.06554794311523438, 0.8113422393798828, -0.8107452392578125, 1.468048095703125, 0.23221206665039062, 0.99383544921875, 0.3173828125, 1.1502532958984375, 2.6853713989257812, 0.06407356262207031, -0.9017524719238281, 1.0734024047851562, -0.0941619873046875, 0.0744781494140625, 0.16691017150878906, -0.277679443359375, 2.5492477416992188, 2.098623275756836, 0.24032974243164062, 0.18788528442382812, -1.7381477355957031, 1.5049285888671875, 1.4287261962890625, 0.9562721252441406, 3.7000503540039062, 2.3142642974853516, 1.4254913330078125, 1.1163520812988281, 0.8265304565429688, 1.3496284484863281, 0.12915802001953125, -0.5452194213867188, 0.58453369140625, 0.34210205078125, 0.6650161743164062, 0.199066162109375, -0.7509765625, -0.5497283935546875, 1.6333999633789062, 0.1347198486328125, 1.953643798828125, -0.158050537109375, 0.8216476440429688, 0.6367969512939453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000085.npy"}
|
||||
{"epoch": 0.12849584278155707, "step": 86, "batch_size": 64, "mean": 0.6412345170974731, "std": 1.0074729919433594, "min": -1.4253959655761719, "p10": -0.7495201110839843, "median": 0.6680335998535156, "p90": 1.864373779296875, "max": 3.2682266235351562, "pos_frac": 0.78125, "sample": [2.0333709716796875, 0.6628341674804688, -1.0209884643554688, 0.3591327667236328, 2.3546218872070312, 0.7111110687255859, 1.0485382080078125, 0.220184326171875, -1.100555419921875, 0.721343994140625, -0.5183925628662109, -1.1420097351074219, 1.3595733642578125, 3.2682266235351562, 0.243560791015625, 1.6538619995117188, 1.2827091217041016, -0.8531475067138672, -0.70330810546875, 0.17975616455078125, 1.239166259765625, 0.20206451416015625, 0.0892486572265625, 0.9056282043457031, 1.3712844848632812, 0.20561599731445312, 1.456787109375, 0.7564430236816406, 1.864288330078125, 0.32224082946777344, 1.8785667419433594, 1.454742431640625, 1.155670166015625, 0.16031646728515625, 0.6797332763671875, 1.1494369506835938, -1.4253959655761719, 0.4107837677001953, 0.8522224426269531, 0.6732330322265625, -0.8363418579101562, 0.8671283721923828, 0.9226722717285156, 0.1678447723388672, 0.07647705078125, 1.0769233703613281, 1.7024898529052734, -0.3253917694091797, 1.864410400390625, -0.44387054443359375, 2.58941650390625, 1.4519271850585938, 0.00048828125, -0.2197399139404297, 1.7752609252929688, 2.8947830200195312, -0.3286113739013672, 0.32260894775390625, 0.3339500427246094, -0.7693252563476562, 0.1535797119140625, -0.340423583984375, 1.4843559265136719, 0.45589447021484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000086.npy"}
|
||||
{"epoch": 0.13000755857898716, "step": 87, "batch_size": 64, "mean": 0.6819804906845093, "std": 1.2886463403701782, "min": -3.3105239868164062, "p10": -0.993199348449707, "median": 0.777888298034668, "p90": 2.0350959777832034, "max": 3.96844482421875, "pos_frac": 0.71875, "sample": [1.3984298706054688, 3.96844482421875, 2.041900634765625, 0.4688873291015625, 1.3526992797851562, 1.0883331298828125, -0.76934814453125, -1.4786758422851562, 1.6820068359375, 1.9116954803466797, -0.08936691284179688, -0.7274169921875, 0.6950759887695312, -0.9687461853027344, -1.5703125, 2.2022705078125, -0.1070556640625, 1.9150047302246094, 1.0217971801757812, 1.5868301391601562, -1.9373321533203125, 2.780914306640625, 1.4103832244873047, 0.06725311279296875, 1.7105255126953125, -3.3105239868164062, -0.6579704284667969, 0.12186813354492188, 1.9911346435546875, -1.0036792755126953, -0.3495025634765625, 0.20822906494140625, 2.7163448333740234, -1.0043792724609375, 0.792266845703125, -0.7552604675292969, 0.6107635498046875, 0.4245185852050781, 0.7682476043701172, 0.7875289916992188, 0.3338661193847656, 1.4209136962890625, 1.137176513671875, -1.34124755859375, 0.458984375, -0.4705352783203125, 1.1842899322509766, 1.9465408325195312, 0.6032886505126953, 1.4079132080078125, 1.0660400390625, 1.5823211669921875, 2.5229644775390625, 0.340972900390625, 0.3859443664550781, -0.03084564208984375, 0.8521957397460938, -0.21199798583984375, 2.082733154296875, 2.0192184448242188, 1.99810791015625, 0.967529296875, 0.7459869384765625, 1.650604248046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000087.npy"}
|
||||
{"epoch": 0.13151927437641722, "step": 88, "batch_size": 64, "mean": 0.6377411484718323, "std": 1.3135112524032593, "min": -2.39874267578125, "p10": -0.830219268798828, "median": 0.5875968933105469, "p90": 2.114603042602539, "max": 4.0198822021484375, "pos_frac": 0.671875, "sample": [1.5944747924804688, 0.5170497894287109, -0.6618804931640625, 0.7050323486328125, 4.0198822021484375, 1.3059310913085938, 3.58489990234375, 0.394073486328125, 1.3936843872070312, -0.015192031860351562, 2.054920196533203, 1.5979690551757812, 1.3694324493408203, 1.1654281616210938, -0.9903717041015625, 1.30389404296875, 0.7496185302734375, -0.504852294921875, -2.39874267578125, 0.5367355346679688, 2.121417999267578, 0.4128990173339844, 0.26496124267578125, -0.5752048492431641, -1.4799575805664062, 1.3787384033203125, 1.6393966674804688, 4.011260986328125, -0.0918731689453125, -0.5552196502685547, -2.347747802734375, -0.6949691772460938, 1.0908966064453125, 1.031158447265625, 1.1831321716308594, -0.42983245849609375, 0.638458251953125, 1.3252830505371094, 2.513416290283203, 0.09165382385253906, 0.4374580383300781, 1.5825424194335938, 1.9124259948730469, 0.96209716796875, -0.16827392578125, 0.240936279296875, 0.21539306640625, -0.2743968963623047, 2.0987014770507812, 1.1218318939208984, 1.2936687469482422, 0.6751937866210938, 2.6218414306640625, 0.37225341796875, 1.0593109130859375, -0.88818359375, -0.01410675048828125, -0.2553272247314453, 0.20349884033203125, 2.1483116149902344, -0.20095443725585938, -1.26177978515625, -0.6488971710205078, -1.6679668426513672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000088.npy"}
|
||||
{"epoch": 0.1330309901738473, "step": 89, "batch_size": 64, "mean": 0.6511195302009583, "std": 1.2641977071762085, "min": -2.42645263671875, "p10": -0.8110183715820312, "median": 0.5228910446166992, "p90": 2.3951568603515625, "max": 4.226531982421875, "pos_frac": 0.71875, "sample": [0.5349960327148438, 0.5396232604980469, 0.39455223083496094, 0.7281455993652344, 2.3560638427734375, 3.0647754669189453, -0.07599639892578125, 0.19244956970214844, 0.6807785034179688, 2.5948104858398438, 1.3411483764648438, 1.5333251953125, 1.177459716796875, -0.7315826416015625, -0.09592437744140625, 0.4443397521972656, 0.8968582153320312, 0.182586669921875, -0.04749298095703125, -1.858999252319336, 1.4811553955078125, -0.5715141296386719, 0.584381103515625, 2.0409393310546875, 0.5107860565185547, 4.226531982421875, 0.9956016540527344, 0.14825820922851562, -0.4994850158691406, 1.1619873046875, 0.01706695556640625, 0.4997406005859375, 0.45937538146972656, 0.32538604736328125, 2.188873291015625, -0.16493988037109375, 2.4853363037109375, 0.7217750549316406, -2.42645263671875, -0.5687103271484375, 2.240121841430664, 1.5729751586914062, 2.8846435546875, 3.38018798828125, 0.153472900390625, -0.057605743408203125, -0.3672370910644531, 2.4119110107421875, 2.074138641357422, -0.921630859375, -0.845062255859375, 0.8949375152587891, 0.42356109619140625, 0.961517333984375, 1.0936260223388672, -1.0858917236328125, 0.7287673950195312, 0.4264869689941406, -1.2229938507080078, -1.6527786254882812, 0.620513916015625, -0.2903289794921875, 0.7509593963623047, 0.029348373413085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000089.npy"}
|
||||
{"epoch": 0.1345427059712774, "step": 90, "batch_size": 64, "mean": 0.6701725721359253, "std": 1.182490587234497, "min": -3.449676513671875, "p10": -0.4634552001953125, "median": 0.6128654479980469, "p90": 2.0584453582763675, "max": 4.493927001953125, "pos_frac": 0.71875, "sample": [-0.8315353393554688, 1.7963619232177734, -0.013843536376953125, 0.8829078674316406, 1.9989433288574219, 0.5127391815185547, 1.49188232421875, 2.5860137939453125, 0.3306884765625, -3.449676513671875, 2.42388916015625, 1.3510246276855469, 1.3528099060058594, 1.0453128814697266, 1.60333251953125, -0.3279914855957031, -0.000629425048828125, 2.221893310546875, -0.7599830627441406, 0.5656776428222656, 1.6981048583984375, 0.5836410522460938, 1.5028076171875, 2.7183303833007812, 2.781494140625, 0.717132568359375, -0.4497833251953125, 4.493927001953125, 0.7878837585449219, 1.2559967041015625, 0.8301925659179688, 0.6573028564453125, 0.18969345092773438, 1.39617919921875, 0.5927047729492188, -0.02410888671875, -0.4693145751953125, -1.0170974731445312, 0.7652206420898438, 0.9011707305908203, 0.061656951904296875, 0.458953857421875, 1.5479278564453125, 0.633026123046875, 2.0839462280273438, 0.2129840850830078, 0.5866432189941406, 1.2815876007080078, -0.09348869323730469, -0.032741546630859375, -1.4272804260253906, 0.9511642456054688, 0.8114013671875, -0.11540985107421875, 0.3267841339111328, 0.4469413757324219, -0.31011962890625, -1.44952392578125, -0.167083740234375, 1.3434486389160156, -0.42084503173828125, 0.0771331787109375, 0.029033660888671875, 1.363607406616211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000090.npy"}
|
||||
{"epoch": 0.1360544217687075, "step": 91, "batch_size": 64, "mean": 0.7297590970993042, "std": 1.2468427419662476, "min": -2.86297607421875, "p10": -0.6714874267578124, "median": 0.6973752975463867, "p90": 2.320510864257813, "max": 3.7198867797851562, "pos_frac": 0.71875, "sample": [1.105621337890625, 1.8921279907226562, 1.4326839447021484, -0.5879554748535156, 0.3914337158203125, 2.684558868408203, -0.020582199096679688, 0.5281143188476562, 2.4691085815429688, 3.7198867797851562, 1.668426513671875, -0.417083740234375, 1.5677070617675781, 0.020334243774414062, 0.5775585174560547, -0.9055843353271484, 0.3679046630859375, 0.8997955322265625, 1.0052108764648438, -1.13031005859375, 1.7670612335205078, 3.4131622314453125, 0.5096817016601562, 0.41560935974121094, 2.2645416259765625, 1.3457984924316406, 1.1557464599609375, -0.44249725341796875, 0.9683380126953125, 1.38177490234375, 0.09636688232421875, -0.7072868347167969, -0.3884735107421875, 0.3027362823486328, 2.134023666381836, 0.865142822265625, -0.095001220703125, 1.3773956298828125, -0.2852783203125, -2.86297607421875, 1.3633499145507812, -1.2069931030273438, 0.48447418212890625, -2.2177047729492188, 0.21305084228515625, 1.0198554992675781, -0.113006591796875, 2.6198043823242188, 1.1511077880859375, 0.9025688171386719, 0.18152618408203125, -1.0120773315429688, -0.28204345703125, -0.15507793426513672, 2.3444976806640625, 2.8248634338378906, 1.3722686767578125, -0.07931137084960938, 2.1810455322265625, 1.8397445678710938, 0.8171920776367188, 0.20895957946777344, 1.4887123107910156, 0.27295494079589844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000091.npy"}
|
||||
{"epoch": 0.13756613756613756, "step": 92, "batch_size": 64, "mean": 0.6674777865409851, "std": 1.3378243446350098, "min": -2.3909835815429688, "p10": -1.156598663330078, "median": 0.7622833251953125, "p90": 2.4901695251464853, "max": 3.5151519775390625, "pos_frac": 0.671875, "sample": [0.7803573608398438, -0.34194374084472656, -2.0269851684570312, -0.2842216491699219, -0.4416656494140625, 0.2747688293457031, -0.21337318420410156, 0.8850727081298828, -0.0592498779296875, 0.04409217834472656, -1.1683425903320312, 1.2491607666015625, 2.166341781616211, -1.5344619750976562, 2.6809616088867188, 2.1261463165283203, 0.9868507385253906, -0.0024204254150390625, 2.673643112182617, -0.9173660278320312, 0.9417724609375, -0.67626953125, 1.5758857727050781, 1.2459182739257812, 1.7865447998046875, 0.27397918701171875, 0.017383575439453125, 1.185211181640625, -0.5657424926757812, 0.7442092895507812, -1.3946819305419922, 1.7911224365234375, 1.366455078125, 0.21496200561523438, 1.1336517333984375, -0.6092910766601562, 0.4889678955078125, 1.4064102172851562, -1.1291961669921875, -0.34815216064453125, 0.6980361938476562, 0.3898735046386719, 2.2223968505859375, 2.136474609375, 1.105855941772461, -1.4273185729980469, 2.3046646118164062, 2.2759017944335938, 2.7088623046875, -1.3785400390625, -0.6487388610839844, 3.5151519775390625, 1.0257835388183594, -0.09052848815917969, 1.0696563720703125, -2.3909835815429688, 3.18450927734375, 0.9178695678710938, 0.4632415771484375, 2.569671630859375, 2.8341102600097656, 0.3160076141357422, 1.2141952514648438, 1.3759193420410156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000092.npy"}
|
||||
{"epoch": 0.13907785336356765, "step": 93, "batch_size": 64, "mean": 0.5978972911834717, "std": 1.3005242347717285, "min": -2.2006072998046875, "p10": -1.052385902404785, "median": 0.4894084930419922, "p90": 2.23900146484375, "max": 4.71240234375, "pos_frac": 0.703125, "sample": [0.5399589538574219, 1.4073257446289062, 3.1938323974609375, 0.8590164184570312, -2.2006072998046875, 0.21449661254882812, 0.8771514892578125, -0.2544097900390625, -0.07786369323730469, 2.0892257690429688, 2.4982261657714844, 2.1683349609375, 4.71240234375, -1.2394371032714844, 0.07043266296386719, 0.8104686737060547, 1.4141998291015625, 1.5517845153808594, -0.07167625427246094, -1.3408126831054688, 1.2961502075195312, 0.9881744384765625, -1.0932464599609375, 0.38373565673828125, 0.16823768615722656, -0.45501708984375, 1.7041397094726562, -0.4031047821044922, -0.9570446014404297, -0.10929107666015625, 0.8058319091796875, 0.05565643310546875, 0.4388580322265625, 2.9505691528320312, 0.04039764404296875, 0.6011276245117188, -0.8429641723632812, 0.66510009765625, -0.6587982177734375, 0.8210067749023438, 0.9208450317382812, 0.4056434631347656, 1.1628665924072266, 1.2830619812011719, 0.2860107421875, 0.430755615234375, 0.2880725860595703, 0.03432464599609375, 2.269287109375, 0.23783302307128906, 2.974275588989258, 0.6914997100830078, 2.02618408203125, 1.1147193908691406, -0.815093994140625, -0.9176197052001953, 2.8474693298339844, -1.1856536865234375, -1.50830078125, 0.677734375, 1.9200935363769531, -1.6154327392578125, -0.129913330078125, 1.2451972961425781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000093.npy"}
|
||||
{"epoch": 0.14058956916099774, "step": 94, "batch_size": 64, "mean": 0.836177408695221, "std": 1.502780556678772, "min": -5.124347686767578, "p10": -0.8445877075195312, "median": 1.0347099304199219, "p90": 2.474402236938477, "max": 4.11737060546875, "pos_frac": 0.78125, "sample": [2.0894241333007812, 1.1515655517578125, 1.9986343383789062, 2.5476856231689453, 0.8766555786132812, -2.194610595703125, -0.5071067810058594, 0.3770904541015625, 1.0615234375, 2.419872283935547, 0.49913978576660156, 3.526905059814453, 0.397064208984375, -5.124347686767578, 1.0078964233398438, 1.1671218872070312, -1.0515518188476562, -0.1792469024658203, 0.8061065673828125, 0.4798583984375, 1.8845691680908203, 1.5312881469726562, 1.1898689270019531, 1.3624763488769531, 1.6878776550292969, 0.40979957580566406, 0.37249755859375, -0.35341644287109375, -1.7599258422851562, 1.076507568359375, 1.6640605926513672, -0.04010581970214844, 0.46893882751464844, 0.0081787109375, 1.2905426025390625, 1.2174568176269531, 0.965423583984375, 2.3927001953125, 1.2117195129394531, 3.7597808837890625, 2.179300308227539, 1.9818267822265625, -0.21163558959960938, 1.36468505859375, 2.294208526611328, 2.497772216796875, -0.72979736328125, 2.822357177734375, 3.080322265625, 1.6015453338623047, 4.11737060546875, 0.5124931335449219, 0.09051513671875, -0.6959514617919922, 1.2403640747070312, 0.1006317138671875, -1.9195709228515625, 1.4047222137451172, 0.586151123046875, 1.2274856567382812, 0.2367839813232422, -0.8937835693359375, -1.1274166107177734, 0.06505584716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000094.npy"}
|
||||
{"epoch": 0.1421012849584278, "step": 95, "batch_size": 64, "mean": 0.8631570339202881, "std": 1.2848219871520996, "min": -2.3701934814453125, "p10": -0.9112012863159179, "median": 0.8193206787109375, "p90": 2.383259963989258, "max": 3.635101318359375, "pos_frac": 0.796875, "sample": [3.477264404296875, -1.4639434814453125, 1.6798171997070312, 0.39105987548828125, 0.7013778686523438, 0.3573951721191406, 3.5608367919921875, 0.45473480224609375, 1.0745925903320312, 2.3844947814941406, 0.4098625183105469, 0.974090576171875, 1.774444580078125, 0.5379600524902344, -0.67059326171875, 0.07720947265625, 0.6054534912109375, 0.9902572631835938, 0.6150016784667969, 0.9840850830078125, 0.258941650390625, 2.2933197021484375, -1.0941410064697266, 1.8098640441894531, -1.223846435546875, -1.094207763671875, 3.53271484375, 1.6711273193359375, 0.39150238037109375, -2.3701934814453125, 0.728057861328125, 2.2595062255859375, 0.6630020141601562, 1.8491592407226562, -0.9178886413574219, 0.22247314453125, 2.124176025390625, 1.0510406494140625, -1.0027313232421875, 0.02472686767578125, 0.0657958984375, -0.042369842529296875, 0.91058349609375, 1.4784107208251953, 0.018402099609375, 2.0900344848632812, 2.75115966796875, -0.11596298217773438, 1.0382671356201172, 2.7046051025390625, 1.05487060546875, 2.3803787231445312, -0.8955974578857422, 1.3928375244140625, 1.757293701171875, 3.635101318359375, 0.0595550537109375, -0.20011138916015625, 1.7145404815673828, 1.3648910522460938, 0.28017234802246094, 1.30029296875, 1.10845947265625, -0.7015666961669922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000095.npy"}
|
||||
{"epoch": 0.1436130007558579, "step": 96, "batch_size": 64, "mean": 0.892608106136322, "std": 1.4201372861862183, "min": -2.6982803344726562, "p10": -0.7774715423583983, "median": 0.8366279602050781, "p90": 2.5683759689331067, "max": 4.93621826171875, "pos_frac": 0.75, "sample": [1.6400794982910156, -2.6982803344726562, 1.4781951904296875, 0.9192428588867188, 2.132568359375, -0.372039794921875, 0.92034912109375, -0.8734359741210938, 0.6595077514648438, 0.33441925048828125, 0.9859695434570312, -0.035236358642578125, 1.0315322875976562, 3.8201904296875, 1.1813201904296875, 1.8853607177734375, -0.894775390625, 0.16623306274414062, 1.0283088684082031, -0.5070114135742188, 2.137706756591797, 2.0335845947265625, 2.0326309204101562, -2.54278564453125, 0.438262939453125, 0.5480403900146484, 2.0613555908203125, 0.447540283203125, 0.35120391845703125, -0.12575531005859375, 2.3025474548339844, 3.359222412109375, 4.93621826171875, 1.4021987915039062, -0.6320400238037109, 0.6928939819335938, -0.965484619140625, 0.2305736541748047, 0.7540130615234375, 1.9130363464355469, 2.10687255859375, -0.8352470397949219, 1.266897201538086, 0.2162017822265625, 2.6972808837890625, 0.369720458984375, 2.682302474975586, 0.086029052734375, 2.1495094299316406, -0.6426620483398438, 1.869781494140625, -0.098358154296875, 0.9877681732177734, 3.1139907836914062, 3.6646728515625, 1.0950393676757812, 0.4045848846435547, 0.08459281921386719, -0.19269371032714844, 1.9943161010742188, 1.302154541015625, 0.06797218322753906, -0.0364990234375, -1.4047698974609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000096.npy"}
|
||||
{"epoch": 0.14512471655328799, "step": 97, "batch_size": 64, "mean": 0.660297155380249, "std": 1.4580153226852417, "min": -2.1367263793945312, "p10": -1.076658058166504, "median": 0.6316947937011719, "p90": 2.3680145263671886, "max": 5.3492431640625, "pos_frac": 0.625, "sample": [-1.3125457763671875, 1.5409011840820312, -1.3132610321044922, 4.366880416870117, 1.0027751922607422, 3.4386062622070312, -0.348876953125, -2.0663604736328125, -0.13559722900390625, -1.316650390625, -1.6101741790771484, -0.21984100341796875, 0.5744094848632812, 1.4511547088623047, -1.0078964233398438, -2.1367263793945312, -0.18299293518066406, 1.9230995178222656, 1.8036384582519531, 0.2829246520996094, 0.8989944458007812, 1.4830551147460938, 0.3575439453125, -0.6046485900878906, 0.34870147705078125, -0.3423614501953125, 0.13390350341796875, 0.9522247314453125, 0.8254318237304688, 0.86895751953125, 1.6352005004882812, -1.096282958984375, 2.1148605346679688, 0.6889801025390625, 0.5106678009033203, 0.9799652099609375, 3.6310901641845703, 0.9439697265625, -0.5937538146972656, 2.4761810302734375, 1.212799072265625, -0.5376472473144531, 5.3492431640625, 1.1809654235839844, 1.2900619506835938, -1.0308666229248047, 0.8223648071289062, 0.403076171875, 2.550262451171875, -0.5790863037109375, -0.1338653564453125, 0.5139389038085938, 2.1546478271484375, -0.35066986083984375, 2.4594573974609375, 1.8286666870117188, 1.8550128936767578, -0.22866058349609375, 0.7881069183349609, 1.5388107299804688, 1.3651161193847656, -0.3886566162109375, -0.007884979248046875, -0.7423229217529297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000097.npy"}
|
||||
{"epoch": 0.14663643235071808, "step": 98, "batch_size": 64, "mean": 0.5520361661911011, "std": 1.1844736337661743, "min": -2.0243568420410156, "p10": -1.0396896362304686, "median": 0.5621871948242188, "p90": 2.0129646301269535, "max": 3.1109657287597656, "pos_frac": 0.734375, "sample": [0.10882186889648438, 0.4388923645019531, 1.1392173767089844, 1.0518226623535156, 0.8785781860351562, 0.19615936279296875, 0.7770004272460938, 0.7816619873046875, 0.6540679931640625, 2.0729751586914062, -1.9285888671875, -0.6656341552734375, 1.8729400634765625, 0.021167755126953125, 1.6395225524902344, 0.06728363037109375, 0.3048095703125, 2.8247604370117188, 0.7504062652587891, -0.6648712158203125, 1.3651161193847656, 3.1109657287597656, -0.9730072021484375, 0.760467529296875, 0.3469104766845703, -0.4517192840576172, 0.26177215576171875, 0.2528724670410156, -0.4539356231689453, -0.8380775451660156, 1.1389999389648438, 1.384674072265625, 1.3507232666015625, 1.74920654296875, -1.0737571716308594, 0.5497817993164062, 1.049591064453125, 2.608001708984375, 0.4145336151123047, 0.3119659423828125, 1.66510009765625, -0.11794281005859375, -0.6219711303710938, 1.1695499420166016, -1.5855941772460938, -1.2680130004882812, 1.568857192993164, 0.3042144775390625, -0.4375762939453125, 3.04046630859375, 0.5745925903320312, 2.5218582153320312, -1.068267822265625, -1.4473552703857422, -0.4488525390625, 1.3105087280273438, 1.0266494750976562, 0.12057304382324219, 0.5902366638183594, 2.572509765625, 0.2961845397949219, 1.8060111999511719, 0.5968551635742188, -2.0243568420410156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000098.npy"}
|
||||
{"epoch": 0.14814814814814814, "step": 99, "batch_size": 64, "mean": 0.7876993417739868, "std": 1.5577397346496582, "min": -3.4546051025390625, "p10": -0.8775043487548828, "median": 0.7397632598876953, "p90": 2.4859668731689455, "max": 4.525646209716797, "pos_frac": 0.671875, "sample": [3.7636795043945312, 3.884614944458008, 2.5002365112304688, 3.6238021850585938, 1.22314453125, 0.77020263671875, -0.29306793212890625, -0.8899955749511719, -0.15838623046875, 4.525646209716797, 1.20758056640625, 2.0091629028320312, 2.0707130432128906, 2.3695602416992188, 0.4168415069580078, -0.28354644775390625, 0.35674285888671875, 0.1777782440185547, -0.08701705932617188, 2.3465423583984375, 1.560089111328125, -0.2540626525878906, -0.231903076171875, 0.09369277954101562, -0.848358154296875, -0.22408294677734375, 1.79730224609375, -1.614166259765625, -0.7262611389160156, 1.8367691040039062, 0.45885467529296875, 0.140228271484375, 0.13265609741210938, 0.7432022094726562, 0.7363243103027344, 1.6331329345703125, 2.0357933044433594, -1.6117134094238281, 0.9810428619384766, 2.4526710510253906, 0.7488880157470703, -2.479248046875, 2.526165008544922, 4.479156494140625, -0.3668861389160156, 1.8422317504882812, 2.0718116760253906, -0.07865715026855469, -3.4546051025390625, 1.3955879211425781, 1.1660194396972656, 1.4223308563232422, 1.7423973083496094, 1.4842338562011719, -1.1229705810546875, 0.16553497314453125, -0.27490997314453125, 1.6858978271484375, 0.8551616668701172, -1.1285781860351562, 0.3859405517578125, 0.08184432983398438, -0.6588211059570312, -0.7012138366699219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000099.npy"}
|
||||
{"epoch": 0.14965986394557823, "step": 100, "batch_size": 64, "mean": 0.7481718063354492, "std": 1.8027263879776, "min": -3.8255538940429688, "p10": -1.16617431640625, "median": 0.5420894622802734, "p90": 3.064774322509768, "max": 6.49163818359375, "pos_frac": 0.71875, "sample": [0.44342041015625, -0.4556884765625, -1.2931900024414062, 1.4265365600585938, 1.61151123046875, -0.5309600830078125, 0.5397205352783203, 2.2202510833740234, 3.3208541870117188, 1.0449066162109375, 0.2464141845703125, 1.1303024291992188, 1.5766677856445312, 2.2961349487304688, 0.8520584106445312, 3.8345870971679688, -1.243316650390625, 0.11023330688476562, 0.38912010192871094, 0.872283935546875, 0.5444583892822266, 0.1504974365234375, -1.767791748046875, 0.44556236267089844, -0.28472900390625, 2.055084228515625, 0.1285858154296875, 1.595245361328125, 0.91845703125, -1.835540771484375, 0.129791259765625, -1.3611602783203125, -0.6525192260742188, -0.986175537109375, -0.4344139099121094, -0.6677780151367188, 0.6755218505859375, -0.8747749328613281, -3.8255538940429688, 3.7342567443847656, 0.5566558837890625, 4.9445343017578125, -0.4141082763671875, 0.5376319885253906, 0.05926513671875, 2.2500228881835938, 2.0950164794921875, 0.08217811584472656, 0.09894752502441406, -0.9010734558105469, 4.015289306640625, 2.2356414794921875, 1.3802032470703125, 0.9551830291748047, 0.184906005859375, 1.5545578002929688, -0.10424041748046875, 3.9993743896484375, 2.467254638671875, 0.809417724609375, 6.49163818359375, 0.71728515625, -3.7815093994140625, 1.5700531005859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000100.npy"}
|
||||
{"epoch": 0.15117157974300832, "step": 101, "batch_size": 64, "mean": 1.0367602109909058, "std": 1.399488091468811, "min": -1.62408447265625, "p10": -0.7841350555419921, "median": 0.9540624618530273, "p90": 2.920461463928223, "max": 5.467376708984375, "pos_frac": 0.78125, "sample": [0.16558074951171875, 0.906219482421875, 0.49105072021484375, -0.5263824462890625, 2.9389877319335938, 2.3855514526367188, 1.5587158203125, 0.46816253662109375, -1.2425079345703125, 0.9351100921630859, -0.2514019012451172, 2.0561904907226562, -0.789215087890625, 0.6592254638671875, 3.6575546264648438, -0.7722816467285156, 2.1878890991210938, 1.3927478790283203, -0.5194587707519531, -1.2458114624023438, 1.7338180541992188, 1.0301437377929688, 1.2672271728515625, 0.050334930419921875, 0.5642852783203125, 1.5143470764160156, 2.8272323608398438, 3.2124176025390625, 0.40970802307128906, 2.8772335052490234, 1.4609222412109375, 2.483154296875, 3.074106216430664, 0.50299072265625, -0.13921356201171875, -1.62408447265625, 1.0569534301757812, -1.0761337280273438, -0.3430919647216797, -0.1243438720703125, 0.07395172119140625, 2.21075439453125, 1.0123424530029297, 0.6848068237304688, 3.023345947265625, 1.1570587158203125, 0.6640777587890625, -0.8777923583984375, 0.1954669952392578, 0.8406734466552734, 3.2837295532226562, 0.18190383911132812, 2.845855712890625, 2.1001014709472656, 0.5158309936523438, 1.055267333984375, 2.2534332275390625, 1.3126983642578125, -1.3134193420410156, 0.9730148315429688, 1.202850341796875, 1.6187286376953125, 5.467376708984375, 0.6566658020019531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000101.npy"}
|
||||
{"epoch": 0.15268329554043839, "step": 102, "batch_size": 64, "mean": 0.7505236864089966, "std": 1.6681915521621704, "min": -2.8406829833984375, "p10": -1.3520080566406247, "median": 0.5724029541015625, "p90": 2.7638244628906254, "max": 5.00067138671875, "pos_frac": 0.671875, "sample": [2.0141677856445312, -0.5848922729492188, -0.027166366577148438, 1.426025390625, -2.2057876586914062, 0.04909706115722656, -0.297882080078125, 1.7500839233398438, -2.5931320190429688, 0.07135391235351562, 1.7353744506835938, 1.0973968505859375, 0.03253936767578125, 0.28333091735839844, 2.79705810546875, -0.6411666870117188, -1.6083335876464844, 4.645782470703125, -0.3791656494140625, 2.686279296875, -1.1228218078613281, 0.35602760314941406, 1.5761642456054688, 0.2999267578125, 1.984853744506836, 1.458974838256836, 1.4646453857421875, 1.7886390686035156, -1.8162307739257812, 0.4135322570800781, 2.5143814086914062, 2.0636329650878906, -1.1808319091796875, 2.3244171142578125, 0.05536651611328125, 0.66644287109375, 1.6436538696289062, 3.7875518798828125, 3.7198638916015625, -0.45310020446777344, 0.14035797119140625, 3.355865478515625, -1.63543701171875, 0.5734710693359375, -0.8492507934570312, 1.143280029296875, 0.7284469604492188, 0.5713348388671875, 0.8403759002685547, 0.23575592041015625, 2.202106475830078, 2.2865447998046875, -0.28185272216796875, -1.4253692626953125, -0.3191032409667969, -0.7660446166992188, -0.39273834228515625, 5.00067138671875, -0.046237945556640625, -2.8406829833984375, 2.8326148986816406, 2.1325454711914062, 1.9892234802246094, 0.7615833282470703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000102.npy"}
|
||||
{"epoch": 0.15419501133786848, "step": 103, "batch_size": 64, "mean": 0.8205587863922119, "std": 1.5562326908111572, "min": -2.3835830688476562, "p10": -0.8306385040283203, "median": 0.6966705322265625, "p90": 2.773204803466797, "max": 5.663444519042969, "pos_frac": 0.703125, "sample": [-0.885528564453125, 0.7259902954101562, 2.4210662841796875, -2.3835830688476562, 1.7113380432128906, 0.5985603332519531, 0.9234848022460938, 0.12281036376953125, 0.7315902709960938, 1.1520271301269531, -0.46183013916015625, 1.1868209838867188, -0.6897964477539062, 0.2913818359375, -0.4119415283203125, 2.1281204223632812, 2.0407161712646484, 2.6732826232910156, -2.25970458984375, 3.085186004638672, -0.1007537841796875, 0.4194908142089844, 0.3212261199951172, 1.2578544616699219, 1.2605743408203125, -0.6390380859375, -0.5231056213378906, 2.0150527954101562, -1.1050033569335938, 1.1555938720703125, 0.6673507690429688, 3.140960693359375, 3.446533203125, 0.1893444061279297, -0.7874679565429688, 0.4399566650390625, -1.2606582641601562, 1.543365478515625, 0.6058425903320312, 0.17155838012695312, 2.7552337646484375, 0.4347953796386719, 0.8392066955566406, 1.08929443359375, 2.0677566528320312, -0.5947265625, 2.7809066772460938, 0.22456741333007812, 3.5574607849121094, -0.8491401672363281, 4.9892120361328125, 2.2994918823242188, -1.8616142272949219, 0.7481307983398438, -0.7581329345703125, -0.067657470703125, 0.19720458984375, -0.210418701171875, -0.3585700988769531, 1.6270713806152344, 0.9256744384765625, 1.0404090881347656, 5.663444519042969, 1.0574951171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000103.npy"}
|
||||
{"epoch": 0.15570672713529857, "step": 104, "batch_size": 64, "mean": 0.8415881395339966, "std": 1.7601736783981323, "min": -3.0569610595703125, "p10": -1.370681762695312, "median": 0.6440448760986328, "p90": 3.162010383605957, "max": 5.9444580078125, "pos_frac": 0.671875, "sample": [1.6190185546875, 1.441925048828125, 1.21929931640625, 0.22714996337890625, -1.5333480834960938, -0.313720703125, 0.6218185424804688, 0.99365234375, 1.88922119140625, -0.63946533203125, 0.2727928161621094, -2.0564193725585938, 5.9444580078125, 2.096832275390625, -1.6556549072265625, -0.3970794677734375, 3.01458740234375, 0.7992267608642578, -0.5075645446777344, 0.6078262329101562, 0.005340576171875, -3.0569610595703125, 2.75115966796875, 2.4096527099609375, -0.42236328125, -0.06037139892578125, 0.9077987670898438, -1.6452102661132812, 3.177724838256836, -0.40978431701660156, 0.5731201171875, -0.3288249969482422, -0.5109176635742188, 0.7923355102539062, 2.413135528564453, 1.1783084869384766, 0.5669174194335938, -0.8007698059082031, 3.2758560180664062, 0.6662712097167969, 0.9874343872070312, 0.06228446960449219, 4.5968017578125, -0.6756439208984375, 2.214855194091797, 2.9809494018554688, 2.2508544921875, 0.5389041900634766, -2.3124923706054688, -0.9911270141601562, 1.8357009887695312, 0.9143714904785156, 2.1934852600097656, 3.1253433227539062, 1.0346660614013672, 0.9038658142089844, 0.25231170654296875, -0.2014923095703125, 0.47782135009765625, -1.7693328857421875, -0.8923568725585938, 3.8954200744628906, 3.77447509765625, 3.537567138671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000104.npy"}
|
||||
{"epoch": 0.15721844293272866, "step": 105, "batch_size": 64, "mean": 0.7174830436706543, "std": 1.6079182624816895, "min": -2.9199981689453125, "p10": -0.7797454833984374, "median": 0.32891082763671875, "p90": 2.8153091430664063, "max": 4.716510772705078, "pos_frac": 0.640625, "sample": [1.2137603759765625, -0.02130889892578125, 0.803192138671875, 1.5336494445800781, 0.9934768676757812, 1.7151355743408203, -2.87493896484375, 4.089599609375, -2.0345001220703125, 0.8705615997314453, -0.5953292846679688, 2.4452171325683594, 1.571146011352539, 2.362060546875, -0.000865936279296875, 3.5477218627929688, 0.12772369384765625, -2.2764739990234375, -0.5643577575683594, 3.0196571350097656, 0.03258514404296875, 0.2980079650878906, 0.0763092041015625, -0.794769287109375, 0.33562469482421875, 0.2105712890625, 1.2894248962402344, 2.672454833984375, 1.4228973388671875, -0.6449737548828125, -0.4033966064453125, 1.1143321990966797, -0.74468994140625, -0.29636383056640625, -1.0126571655273438, 2.8968963623046875, 0.7610397338867188, -0.174041748046875, -0.72021484375, 0.7049293518066406, 0.20062637329101562, -0.4379920959472656, -0.659942626953125, 2.0174102783203125, 2.6909027099609375, 2.3543968200683594, -2.9199981689453125, -0.160919189453125, -0.1350555419921875, -0.19484710693359375, 0.19110107421875, 0.9983711242675781, 0.21364402770996094, 4.5556793212890625, 2.840728759765625, 0.32219696044921875, 4.716510772705078, 1.5944366455078125, -0.48583030700683594, 1.4555530548095703, -0.8177490234375, 2.7559967041015625, 0.90362548828125, 0.970977783203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000105.npy"}
|
||||
{"epoch": 0.15873015873015872, "step": 106, "batch_size": 64, "mean": 0.7614032030105591, "std": 1.6407133340835571, "min": -4.00054931640625, "p10": -1.1798942565917967, "median": 0.8685598373413086, "p90": 2.8323154449462895, "max": 4.222324371337891, "pos_frac": 0.6875, "sample": [1.050323486328125, 2.1741790771484375, 1.2917289733886719, 1.6557579040527344, -0.22714996337890625, -0.6379585266113281, 4.222324371337891, 1.9725761413574219, 0.03375244140625, 1.4753494262695312, 1.0754852294921875, 1.3125877380371094, 4.1745147705078125, 0.6335525512695312, -0.3870716094970703, -1.24005126953125, 0.7539710998535156, 0.5921897888183594, -0.19823455810546875, 0.2236785888671875, -0.2374420166015625, 0.318206787109375, -1.4395294189453125, 0.8432769775390625, -0.6547079086303711, -1.0395278930664062, -0.08688926696777344, -2.2115936279296875, 1.0716285705566406, 4.0321197509765625, -0.3980865478515625, 0.22363662719726562, -0.3809776306152344, 1.3146324157714844, 1.9808731079101562, 3.0989990234375, 2.023160934448242, -0.6846714019775391, 2.864604949951172, 1.95263671875, 1.1501693725585938, 0.6804275512695312, 3.839021682739258, 0.8073577880859375, -1.8443984985351562, -0.0702972412109375, 0.37238502502441406, 0.9867744445800781, -4.00054931640625, 0.8938426971435547, -2.09478759765625, 1.3328857421875, 2.026947021484375, 2.966390609741211, 1.1964492797851562, 1.523651123046875, 0.30875396728515625, 1.9569988250732422, -0.2174835205078125, 2.7569732666015625, -3.352142333984375, 1.7845687866210938, 1.4270477294921875, 1.7569618225097656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000106.npy"}
|
||||
{"epoch": 0.1602418745275888, "step": 107, "batch_size": 64, "mean": 0.5568374395370483, "std": 1.9092270135879517, "min": -3.869476318359375, "p10": -1.7906147003173827, "median": 0.5914201736450195, "p90": 3.166878890991212, "max": 5.4338226318359375, "pos_frac": 0.671875, "sample": [0.41867828369140625, -3.4701461791992188, 1.246856689453125, 4.334072113037109, 1.0539398193359375, 1.7590255737304688, -1.414154052734375, 2.1251983642578125, -2.236419677734375, 1.28765869140625, -3.869476318359375, 1.579254150390625, 1.4441204071044922, 1.1511383056640625, 0.6499900817871094, -1.8649635314941406, -0.12286376953125, 0.506500244140625, 0.2894325256347656, -1.3225555419921875, -0.5843124389648438, 5.4338226318359375, 4.34552001953125, 0.16649627685546875, 3.2906227111816406, -3.4450225830078125, 2.3346195220947266, -1.6171340942382812, -0.42882537841796875, 0.4181938171386719, 0.8837051391601562, -0.8637466430664062, 0.7504425048828125, 1.0139694213867188, 0.6383209228515625, 0.9227752685546875, 3.687957763671875, 0.23671722412109375, 0.7293014526367188, 4.4349365234375, 1.216796875, 2.878143310546875, -0.6785964965820312, -1.1692733764648438, 1.5243854522705078, 1.5221939086914062, 0.3007011413574219, 0.8951416015625, 0.5445194244384766, -1.95550537109375, 3.6787490844726562, 2.100940704345703, 0.15443801879882812, -1.4622516632080078, -0.4952716827392578, 0.4271392822265625, -0.2503509521484375, 1.4585723876953125, 0.7886829376220703, -0.2188873291015625, -0.8734054565429688, -2.9998321533203125, 0.47199249267578125, 1.8849258422851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000107.npy"}
|
||||
{"epoch": 0.1617535903250189, "step": 108, "batch_size": 64, "mean": 0.9285261631011963, "std": 1.7221009731292725, "min": -3.4604415893554688, "p10": -1.0346939086914062, "median": 1.07598876953125, "p90": 3.2164863586425785, "max": 4.8125762939453125, "pos_frac": 0.59375, "sample": [1.5537338256835938, 1.7203903198242188, 3.1038589477539062, 2.4510345458984375, 0.08496284484863281, 3.2647552490234375, 1.9346389770507812, 2.1292591094970703, 4.1364898681640625, -0.9896621704101562, 0.14282989501953125, 4.380565643310547, 1.5148162841796875, -0.43445396423339844, 0.8709259033203125, 1.3019905090332031, -1.0539932250976562, -0.177093505859375, -0.06652069091796875, 1.462930679321289, 2.211669921875, 0.2701301574707031, -0.10752677917480469, -1.69146728515625, 1.1819915771484375, -1.8997116088867188, 2.2735977172851562, -1.7064208984375, 4.8125762939453125, 0.9699859619140625, -0.016937255859375, -0.6355857849121094, 1.9677505493164062, -0.23349380493164062, 1.7156143188476562, 3.820016860961914, -0.2944507598876953, -0.14426422119140625, -3.4604415893554688, 1.8077926635742188, -0.9786758422851562, 3.5047683715820312, 0.2176380157470703, -0.4762096405029297, -0.38549041748046875, 1.4360771179199219, -0.6173973083496094, 3.046163558959961, -0.01247406005859375, -1.4942398071289062, -1.3826675415039062, -0.42162322998046875, -0.4291839599609375, 2.1757736206054688, 1.762054443359375, 1.472625732421875, 1.8748054504394531, 3.866943359375, 1.4468612670898438, 2.1416397094726562, -0.08170127868652344, 2.2987098693847656, -0.32094573974609375, 2.6099395751953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000108.npy"}
|
||||
{"epoch": 0.16326530612244897, "step": 109, "batch_size": 64, "mean": 1.127976894378662, "std": 1.679814100265503, "min": -1.4302444458007812, "p10": -0.5709148406982422, "median": 0.7993402481079102, "p90": 2.804726600646973, "max": 6.737762451171875, "pos_frac": 0.734375, "sample": [0.9198684692382812, 2.2431411743164062, 5.645355224609375, -0.5492134094238281, 2.0904312133789062, 1.2151012420654297, 3.2955970764160156, 2.1880874633789062, 1.3204402923583984, 2.291759490966797, 0.7111396789550781, -0.4104156494140625, 2.73712158203125, 2.667510986328125, 0.0526885986328125, -0.3782958984375, 2.0382137298583984, 0.46722412109375, 0.570220947265625, 1.130910873413086, 1.3310813903808594, 2.833700180053711, -0.6607818603515625, 0.0033311843872070312, 0.8948211669921875, 6.2144622802734375, -1.0392990112304688, 0.12817955017089844, 0.68511962890625, 6.737762451171875, 0.6319599151611328, 0.6979751586914062, -0.183990478515625, -0.4893989562988281, 0.7573871612548828, 1.0024681091308594, -0.7448368072509766, -0.1998748779296875, 0.24600982666015625, 1.9965972900390625, -0.5802154541015625, 0.99249267578125, 0.38533973693847656, -0.0295257568359375, -0.2843799591064453, -0.7057113647460938, 0.6447372436523438, 1.097726821899414, -0.957672119140625, -1.4302444458007812, 0.48236846923828125, 0.663360595703125, 0.8412933349609375, 3.9384918212890625, 2.3201065063476562, 2.4779281616210938, 4.678394317626953, 2.2861175537109375, 1.0724639892578125, 1.4918746948242188, -0.5041351318359375, -0.5365867614746094, 1.065155029296875, 1.6915817260742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000109.npy"}
|
||||
{"epoch": 0.16477702191987906, "step": 110, "batch_size": 64, "mean": 0.6457208395004272, "std": 1.6442521810531616, "min": -3.8150482177734375, "p10": -1.2723730087280274, "median": 0.6053342819213867, "p90": 2.5168918609619144, "max": 5.87738037109375, "pos_frac": 0.703125, "sample": [0.2808341979980469, 0.4732170104980469, -2.3949127197265625, 0.5779304504394531, 0.20479583740234375, 1.095001220703125, 1.6939697265625, 2.188079833984375, 0.439117431640625, 3.0252914428710938, 2.5720787048339844, 1.4568042755126953, 0.32044219970703125, 2.8444366455078125, -0.541259765625, -1.1632003784179688, 0.16530990600585938, 4.2250823974609375, -0.6219444274902344, -3.357858657836914, 0.5287189483642578, 0.7185745239257812, 0.8051395416259766, 1.6324996948242188, 0.22339630126953125, 0.916290283203125, 0.43656158447265625, -0.5587844848632812, 0.3961677551269531, -1.3928451538085938, 1.6658172607421875, 1.26715087890625, 1.541168212890625, -0.9457511901855469, -0.7901611328125, 0.94708251953125, -1.5308990478515625, 3.105682373046875, -0.3521232604980469, 1.4601688385009766, 2.38812255859375, 0.77886962890625, -0.0586700439453125, -0.20703125, 0.0486297607421875, 1.22808837890625, 1.732940673828125, -1.4195327758789062, -0.3251953125, -3.8150482177734375, 0.5115966796875, 1.7601547241210938, 0.7079696655273438, -0.6647567749023438, 0.9375076293945312, 5.87738037109375, 2.1310958862304688, 2.0365123748779297, 0.6327381134033203, 3.6803436279296875, -1.2944583892822266, 1.4744415283203125, 0.84820556640625, -1.2208404541015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000110.npy"}
|
||||
{"epoch": 0.16628873771730915, "step": 111, "batch_size": 64, "mean": 0.8562856912612915, "std": 1.8348157405853271, "min": -4.07537841796875, "p10": -1.4242721557617186, "median": 0.9805984497070312, "p90": 3.0052879333496096, "max": 5.7321014404296875, "pos_frac": 0.671875, "sample": [-0.2462329864501953, -2.3360252380371094, -0.7934036254882812, 5.7321014404296875, 1.2903518676757812, 1.0933074951171875, -1.4892196655273438, 1.8669204711914062, 1.9516639709472656, 1.3156089782714844, 2.0379714965820312, 1.793792724609375, -1.2727279663085938, 0.983642578125, -0.3983612060546875, 1.2114715576171875, 0.1336345672607422, 1.416738510131836, -0.8798904418945312, 3.1801929473876953, 0.63323974609375, 0.733306884765625, 1.3631515502929688, 3.6301345825195312, 0.9120330810546875, 1.4378509521484375, 0.9775543212890625, 3.018524169921875, 1.2352886199951172, 1.19561767578125, -0.1270313262939453, -1.2525272369384766, -4.07537841796875, -2.166290283203125, 1.3429107666015625, 2.049407958984375, 0.9518280029296875, 2.6205596923828125, -0.5259075164794922, 2.9744033813476562, 2.8669052124023438, 2.6409835815429688, 4.1121673583984375, -0.7753143310546875, 0.9147682189941406, 0.2573585510253906, 1.1238784790039062, 5.084747314453125, -1.8535842895507812, 1.5422801971435547, 2.9743728637695312, 3.2675514221191406, 2.0122337341308594, -0.4394989013671875, -2.8435897827148438, 0.5385398864746094, -1.61785888671875, -0.9700469970703125, -0.21696090698242188, 2.0013351440429688, 0.779296875, -0.16564178466796875, 0.3139228820800781, -0.265777587890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000111.npy"}
|
||||
{"epoch": 0.16780045351473924, "step": 112, "batch_size": 64, "mean": 1.0364547967910767, "std": 1.6341077089309692, "min": -3.7323226928710938, "p10": -1.125542449951172, "median": 1.0651979446411133, "p90": 3.0010066986083985, "max": 4.365142822265625, "pos_frac": 0.78125, "sample": [2.574066162109375, 4.0164031982421875, 4.365142822265625, 1.4036598205566406, -0.177703857421875, 1.2922821044921875, 3.7633514404296875, 1.4346466064453125, 1.9080429077148438, 3.008819580078125, 0.33789825439453125, 0.3638763427734375, -1.1066207885742188, 1.535848617553711, 0.24933624267578125, -1.5617218017578125, 0.46295166015625, -0.28850555419921875, -2.7722320556640625, 0.37273406982421875, 2.1600494384765625, -1.546234130859375, 2.8027801513671875, 2.6155242919921875, 0.5382003784179688, 1.9733657836914062, 0.479248046875, 0.7193603515625, 1.32171630859375, 1.389200210571289, 3.2667388916015625, 0.770050048828125, -1.1336517333984375, 0.8515338897705078, 2.919891357421875, 1.0990142822265625, 1.2186336517333984, 0.12714385986328125, 3.9376602172851562, 0.2809600830078125, -0.4991893768310547, 1.5404052734375, 2.1151466369628906, 1.031381607055664, -1.55419921875, 3.5283050537109375, 2.972381591796875, 2.770050048828125, -0.944000244140625, 0.14899444580078125, -3.7323226928710938, 2.982776641845703, 0.06482696533203125, 1.4600143432617188, 1.701568603515625, 0.6788330078125, -0.19415855407714844, -1.3563480377197266, 1.5495147705078125, 0.941070556640625, 1.97308349609375, -0.0245208740234375, 1.314239501953125, 0.8917922973632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000112.npy"}
|
||||
{"epoch": 0.1693121693121693, "step": 113, "batch_size": 64, "mean": 0.846626341342926, "std": 1.8341944217681885, "min": -5.98651123046875, "p10": -1.0531822204589842, "median": 0.7589178085327148, "p90": 3.243308258056641, "max": 4.106426239013672, "pos_frac": 0.734375, "sample": [-0.827239990234375, -2.0167388916015625, -1.02935791015625, 0.16707229614257812, -0.6343002319335938, 4.106426239013672, 0.7518386840820312, -0.25940704345703125, 1.3454742431640625, 2.9390316009521484, 0.5404548645019531, 0.16080474853515625, -0.507232666015625, 2.9277877807617188, -1.5215911865234375, 0.9772186279296875, 0.8767471313476562, 4.085826873779297, -1.0633926391601562, -2.419374465942383, 0.3952789306640625, 0.3796882629394531, 2.5873870849609375, -2.3721160888671875, -0.8402481079101562, 1.1309967041015625, 3.265411376953125, 1.2013359069824219, 1.5088233947753906, 0.4396839141845703, 3.0075206756591797, 2.72393798828125, -0.09911727905273438, -0.7466659545898438, 2.2721786499023438, 3.620584487915039, -5.98651123046875, 1.3985958099365234, 2.20086669921875, 0.28551483154296875, 0.7659969329833984, 3.727445602416992, 0.0147705078125, 3.1917343139648438, -1.0116386413574219, 0.4532623291015625, 1.1019668579101562, 0.5501270294189453, -1.1402854919433594, 2.9400081634521484, 1.4868011474609375, 0.62188720703125, 1.041360855102539, 1.9640045166015625, 3.6177825927734375, -0.34668922424316406, 1.9994659423828125, 0.4021949768066406, 1.3758392333984375, 0.097503662109375, 1.4313583374023438, 0.9174118041992188, 0.024517059326171875, 3.9840660095214844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000113.npy"}
|
||||
{"epoch": 0.1708238851095994, "step": 114, "batch_size": 64, "mean": 1.4495927095413208, "std": 1.7445006370544434, "min": -1.588296890258789, "p10": -0.7290803909301757, "median": 1.1398305892944336, "p90": 3.75204029083252, "max": 5.844505310058594, "pos_frac": 0.796875, "sample": [-0.04084014892578125, 1.5401382446289062, 1.0449562072753906, 1.4721641540527344, 3.1372203826904297, 0.44057464599609375, 2.417388916015625, 0.4629058837890625, 2.65899658203125, -1.0468673706054688, -0.17706298828125, 2.76190185546875, 2.066234588623047, 0.262298583984375, 0.31922149658203125, -1.4590682983398438, 2.4754257202148438, 0.7431125640869141, 0.051082611083984375, 0.4102630615234375, 2.0796165466308594, 0.1076202392578125, 5.844505310058594, 0.19820213317871094, 0.8698768615722656, 3.1840896606445312, 1.5841312408447266, 2.9370956420898438, -0.3275909423828125, 3.0743274688720703, 0.2737274169921875, 1.2347049713134766, 3.9095230102539062, 1.7824249267578125, 2.5644912719726562, 0.6861495971679688, 2.8201522827148438, 2.7276382446289062, 3.6623706817626953, -1.5508918762207031, 5.349761962890625, 3.7904701232910156, 2.2144622802734375, -0.6623592376708984, -0.0032196044921875, 0.5257015228271484, 4.2259063720703125, 0.4218406677246094, -0.9257087707519531, 0.14687728881835938, -1.2047348022460938, 3.62005615234375, 1.9644298553466797, 3.253082275390625, 3.9266319274902344, 3.302154541015625, 0.8924198150634766, 0.05263328552246094, -0.7576751708984375, -1.588296890258789, 4.3094024658203125, -0.14154052734375, 1.8888015747070312, 0.9706230163574219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000114.npy"}
|
||||
{"epoch": 0.17233560090702948, "step": 115, "batch_size": 64, "mean": 1.0813273191452026, "std": 1.7658095359802246, "min": -2.147899627685547, "p10": -0.9219696044921873, "median": 0.8744363784790039, "p90": 3.146670532226564, "max": 6.833517074584961, "pos_frac": 0.75, "sample": [-0.1861114501953125, 1.27337646484375, 2.0313949584960938, -1.4902801513671875, 4.6439971923828125, 2.152467727661133, 1.3228225708007812, 2.4897308349609375, 0.8639297485351562, 6.691497802734375, 1.0007286071777344, -1.0010757446289062, 6.833517074584961, -1.0769729614257812, 0.285858154296875, 1.7018280029296875, 0.02339935302734375, 1.1028289794921875, 0.3946075439453125, 1.162017822265625, 2.7779464721679688, 0.6330184936523438, -0.7373886108398438, -0.5863761901855469, -1.3359794616699219, 1.6172599792480469, 0.03560638427734375, 0.93206787109375, -1.852874755859375, 3.559185028076172, -0.1037139892578125, 0.8524894714355469, 0.5297107696533203, 1.6687164306640625, 1.4942054748535156, 3.3046951293945312, -0.099639892578125, 1.9384899139404297, 1.0781173706054688, 1.83380126953125, 2.4281272888183594, -2.147899627685547, 2.6365203857421875, 0.8316802978515625, 2.438812255859375, 1.2121715545654297, 2.1836700439453125, 1.943887710571289, 0.008228302001953125, 0.6148681640625, 0.3752632141113281, 0.25101470947265625, 4.533382415771484, 0.13287734985351562, 0.007781982421875, -0.38745689392089844, -0.28672027587890625, -0.644805908203125, -1.5691986083984375, 0.6514739990234375, 1.8612384796142578, -0.046077728271484375, 0.8849430084228516, 3.5322647094726562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000115.npy"}
|
||||
{"epoch": 0.17384731670445955, "step": 116, "batch_size": 64, "mean": 1.1488078832626343, "std": 1.525198221206665, "min": -2.4168243408203125, "p10": -0.4934967041015624, "median": 0.9058113098144531, "p90": 3.2893058776855475, "max": 4.439823150634766, "pos_frac": 0.75, "sample": [1.5003814697265625, 0.3162841796875, 0.9740524291992188, 4.3929290771484375, 1.2474899291992188, -0.5817489624023438, -0.3113555908203125, 0.6743736267089844, 2.000638961791992, 2.7076034545898438, 1.5817184448242188, -0.535125732421875, -0.03375244140625, 2.526092529296875, 0.7927951812744141, -0.32965087890625, 0.12136077880859375, 0.23191070556640625, 2.7968673706054688, 0.09553146362304688, -2.4168243408203125, 2.7046127319335938, 2.6716651916503906, 0.9960746765136719, -0.3530769348144531, 1.1876144409179688, 0.9890899658203125, 4.439823150634766, 2.35919189453125, 1.357940673828125, -1.0708465576171875, 0.5748462677001953, -0.2010955810546875, 0.7075080871582031, 3.7265777587890625, 0.8375701904296875, 2.8209457397460938, 3.5135879516601562, 0.49399566650390625, 0.12496185302734375, 0.5882034301757812, 0.7903480529785156, 3.4417953491210938, 1.6866302490234375, 3.3724517822265625, 2.6949501037597656, 2.90625, -0.3963623046875, -0.04668426513671875, 1.3954887390136719, 0.15267562866210938, 2.6353530883789062, -0.10820770263671875, -1.630767822265625, 0.7260246276855469, 1.2729034423828125, 0.3050498962402344, 1.8629684448242188, -0.046932220458984375, 2.2819976806640625, 3.0952987670898438, -1.0861949920654297, 3.5385894775390625, -1.5406837463378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000116.npy"}
|
||||
{"epoch": 0.17535903250188964, "step": 117, "batch_size": 64, "mean": 0.9695931673049927, "std": 1.7702385187149048, "min": -4.527809143066406, "p10": -1.0288747787475585, "median": 0.8788986206054688, "p90": 3.2751224517822264, "max": 5.476661682128906, "pos_frac": 0.734375, "sample": [1.6070175170898438, 3.2110767364501953, 0.3184814453125, 0.33831024169921875, 0.3717803955078125, 1.4912490844726562, 1.5769977569580078, 1.2690582275390625, 0.5585861206054688, 1.4797077178955078, 0.1809844970703125, -1.1887588500976562, 1.692718505859375, -0.3769378662109375, 1.595306396484375, 2.7502365112304688, -0.4902191162109375, 1.8474349975585938, 2.4217453002929688, 3.2670021057128906, -0.8637657165527344, 1.3307762145996094, 1.992767333984375, 3.2786026000976562, -0.6988201141357422, -0.30350494384765625, -0.8657989501953125, 1.9588851928710938, 0.3480720520019531, 5.476661682128906, 0.10265350341796875, 4.394721984863281, 2.9621658325195312, -1.1123523712158203, 0.8881988525390625, 2.1420364379882812, 1.8754959106445312, 3.6658859252929688, 1.0317306518554688, 3.8799209594726562, 0.822998046875, 0.6032676696777344, -0.5898818969726562, 1.5787925720214844, -0.5179595947265625, -4.527809143066406, 0.869598388671875, 0.7543563842773438, 2.8574066162109375, 0.7872085571289062, 0.16520309448242188, 1.79718017578125, 1.6746597290039062, -2.0226669311523438, 3.3123016357421875, -2.6990966796875, -1.098764419555664, -0.3727226257324219, -2.30426025390625, 0.08147430419921875, 3.3520355224609375, 1.3314590454101562, -0.07222557067871094, 0.8652973175048828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000117.npy"}
|
||||
{"epoch": 0.17687074829931973, "step": 118, "batch_size": 64, "mean": 1.3125793933868408, "std": 1.6938717365264893, "min": -2.8687267303466797, "p10": -0.6382362365722656, "median": 1.5083560943603516, "p90": 3.5519966125488285, "max": 4.417636871337891, "pos_frac": 0.765625, "sample": [2.2981719970703125, 1.006500244140625, 3.0862579345703125, -0.5386199951171875, -2.8687267303466797, 2.295001983642578, 2.4505615234375, 0.20680999755859375, 2.3857860565185547, -0.5028228759765625, 1.2046890258789062, 1.5460968017578125, 0.17877578735351562, 4.300777435302734, 0.5509529113769531, 0.014251708984375, -1.9785690307617188, 1.9730796813964844, 0.12124252319335938, 1.4706153869628906, 0.33751678466796875, 2.715301513671875, 1.899496078491211, 3.6541748046875, 1.86651611328125, -0.16068649291992188, 0.123809814453125, -0.6290664672851562, 2.214200973510742, -0.11421775817871094, 1.9635086059570312, 1.6011371612548828, 4.311592102050781, -0.2810935974121094, 0.733154296875, 2.315093994140625, 0.10494232177734375, 4.339912414550781, 2.7274932861328125, -2.42755126953125, -1.128438949584961, 2.8183135986328125, -0.6421661376953125, 2.42236328125, 0.8622398376464844, 1.1035079956054688, -0.6594467163085938, 2.0234375, 3.242572784423828, 2.4744415283203125, 1.8588371276855469, 2.1844215393066406, -0.4237060546875, 4.417636871337891, 4.028739929199219, 1.3188629150390625, -0.890625, 2.457071304321289, 3.598966598510742, 3.4423999786376953, 2.585399627685547, -0.26950645446777344, 0.4942512512207031, 0.1894378662109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000118.npy"}
|
||||
{"epoch": 0.17838246409674982, "step": 119, "batch_size": 64, "mean": 0.987857460975647, "std": 1.856561541557312, "min": -3.315582275390625, "p10": -1.441047668457031, "median": 0.8214588165283203, "p90": 3.241946029663086, "max": 4.354911804199219, "pos_frac": 0.703125, "sample": [0.9665031433105469, 3.4230270385742188, -0.6091518402099609, 1.6393699645996094, -1.2538604736328125, 1.5820159912109375, 2.9501514434814453, 0.8807830810546875, -1.0331840515136719, -0.011867523193359375, -0.9751396179199219, 0.48508453369140625, 1.1800537109375, 1.6407546997070312, 3.052448272705078, -2.1596908569335938, 0.13928985595703125, -2.0667686462402344, 0.6688823699951172, 0.195556640625, -1.523712158203125, 3.958040237426758, 0.4308929443359375, 2.127838134765625, 4.354911804199219, 1.0447158813476562, -1.521270751953125, 2.9531517028808594, 3.7136688232421875, 3.275970458984375, 2.8218994140625, -2.272674560546875, 1.7840442657470703, 1.8599739074707031, 0.777862548828125, 2.38916015625, 0.8650550842285156, 0.03536224365234375, 0.7061080932617188, 4.233345031738281, 3.0713729858398438, 2.7888641357421875, -0.9207782745361328, 0.8801116943359375, -1.2258758544921875, -1.5459842681884766, 2.5051116943359375, 2.7016372680664062, 3.0996017456054688, 0.6481552124023438, 3.0613250732421875, -3.315582275390625, -1.25079345703125, -0.9029560089111328, 0.6950225830078125, -0.4084434509277344, 2.4149951934814453, 4.299903869628906, 3.162555694580078, 0.361968994140625, 0.7230205535888672, 0.3534393310546875, -0.5199851989746094, -0.16241455078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000119.npy"}
|
||||
{"epoch": 0.17989417989417988, "step": 120, "batch_size": 64, "mean": 1.3723986148834229, "std": 1.699534296989441, "min": -2.0248947143554688, "p10": -0.6953826904296874, "median": 1.0546083450317383, "p90": 3.330525779724121, "max": 6.5825653076171875, "pos_frac": 0.828125, "sample": [-1.4434967041015625, 1.0322513580322266, -0.81549072265625, 3.308879852294922, 2.4970703125, 0.2827262878417969, 2.163745880126953, 0.20318603515625, 0.6986198425292969, -1.6054153442382812, 1.8472137451171875, 0.22266006469726562, 0.5341777801513672, 1.191925048828125, 0.3796539306640625, 4.885894775390625, 2.5049972534179688, 0.6258506774902344, 0.89434814453125, 1.3608245849609375, 3.0322418212890625, 0.33925628662109375, 1.72100830078125, 1.4052238464355469, 0.8742828369140625, 1.1613807678222656, 2.3289947509765625, 2.3851318359375, 1.7021713256835938, 2.025543212890625, 6.5825653076171875, -0.1253814697265625, 4.546134948730469, 0.3133811950683594, 3.099681854248047, 0.3505744934082031, 2.1667747497558594, -0.27234649658203125, 3.8274459838867188, 0.792755126953125, -0.7885723114013672, 1.3239173889160156, 0.11346435546875, 5.7828521728515625, -0.474334716796875, 0.4473876953125, -2.0248947143554688, 2.923126220703125, 2.6689987182617188, 0.7791366577148438, 0.8629856109619141, -0.7361373901367188, 3.718902587890625, 0.78289794921875, 2.4573516845703125, -0.6002883911132812, 1.4611053466796875, 3.3104171752929688, 0.6987380981445312, -0.7992172241210938, 2.0474166870117188, 0.43570709228515625, 3.339143753051758, 1.07696533203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000120.npy"}
|
||||
{"epoch": 0.18140589569160998, "step": 121, "batch_size": 64, "mean": 1.1161892414093018, "std": 1.647952675819397, "min": -2.7923126220703125, "p10": -0.7087459564208983, "median": 0.9623699188232422, "p90": 3.215848922729493, "max": 6.65557861328125, "pos_frac": 0.765625, "sample": [4.4926605224609375, 1.476104736328125, -0.6425819396972656, -0.8149223327636719, 3.330038070678711, 2.353851318359375, 3.7417335510253906, -0.45733070373535156, 1.7960700988769531, -0.8884658813476562, 1.2051773071289062, -0.6053009033203125, 5.6107635498046875, 0.6100406646728516, 0.5839385986328125, 0.27655982971191406, 2.9494075775146484, -2.7923126220703125, 1.4624557495117188, -1.0900802612304688, 1.1025848388671875, 0.8414878845214844, 2.069835662841797, 1.180511474609375, 0.9966583251953125, 1.8056449890136719, 0.9989166259765625, -1.2022266387939453, 0.9119033813476562, 1.8926925659179688, 0.15543365478515625, 6.65557861328125, 1.8668556213378906, 0.7821807861328125, -0.6466751098632812, -0.4036712646484375, 1.1844482421875, 1.8013916015625, 2.0981597900390625, 0.7482986450195312, 0.6316566467285156, 0.3723869323730469, 0.8313522338867188, 2.2844314575195312, 1.0714340209960938, 0.8939666748046875, -1.4053993225097656, 3.8900070190429688, 2.2106781005859375, 1.516702651977539, -0.4092388153076172, 0.7922325134277344, 1.7453536987304688, 2.048849105834961, 0.9280815124511719, 3.979736328125, 0.8164234161376953, -0.7353477478027344, 0.5817584991455078, -0.5769882202148438, 0.042270660400390625, 1.6773147583007812, 1.28997802734375, -0.47934532165527344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000121.npy"}
|
||||
{"epoch": 0.18291761148904007, "step": 122, "batch_size": 64, "mean": 1.4077012538909912, "std": 1.9878206253051758, "min": -3.925537109375, "p10": -0.378847312927246, "median": 1.3015737533569336, "p90": 3.3980844497680667, "max": 7.849578857421875, "pos_frac": 0.78125, "sample": [7.849578857421875, -1.0880470275878906, -0.28558349609375, 4.052276611328125, -3.925537109375, -0.41881752014160156, 1.603546142578125, 1.8805160522460938, 1.3857669830322266, 1.6614799499511719, 2.7241744995117188, 0.51416015625, 0.4243488311767578, 0.5142593383789062, -0.2303333282470703, 2.548126220703125, 0.67919921875, 1.6198272705078125, -1.2077293395996094, 3.0488147735595703, 0.5640792846679688, 7.198286056518555, 1.1601791381835938, 3.3677139282226562, 5.18353271484375, 0.690826416015625, 1.2444438934326172, -0.17702102661132812, 1.9793701171875, 2.1595077514648438, -0.189666748046875, 3.1686019897460938, 0.059906005859375, 1.938934326171875, -2.4691543579101562, 2.0719261169433594, 1.1699104309082031, -0.7506637573242188, 2.1124801635742188, 0.5809860229492188, -0.09067916870117188, 1.7002067565917969, -1.40472412109375, 3.3223724365234375, 1.6468963623046875, 2.9902706146240234, 1.3896121978759766, 1.0585365295410156, 0.9061813354492188, 0.8683090209960938, 1.391998291015625, 1.7931575775146484, 0.6933803558349609, 0.14250946044921875, -0.06642913818359375, 1.35870361328125, 1.4483299255371094, 0.08524322509765625, 0.21704864501953125, 3.2421875, 3.876789093017578, 3.411100387573242, -0.064788818359375, 5.7624664306640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000122.npy"}
|
||||
{"epoch": 0.18442932728647016, "step": 123, "batch_size": 64, "mean": 1.2015812397003174, "std": 1.9515923261642456, "min": -5.26123046875, "p10": -1.0987974166870114, "median": 1.2719736099243164, "p90": 3.636886596679688, "max": 6.5994415283203125, "pos_frac": 0.75, "sample": [3.683452606201172, 1.163726806640625, 0.166534423828125, 0.174407958984375, -1.8207435607910156, -0.16935157775878906, 0.9153327941894531, 0.4267730712890625, 6.5994415283203125, 3.5282325744628906, 0.27925872802734375, 2.578521728515625, 0.36379241943359375, -1.80145263671875, 2.7733230590820312, 2.1656761169433594, 0.8313922882080078, 2.1868629455566406, -1.5487480163574219, 2.0563430786132812, 1.3739776611328125, 4.0794830322265625, 1.6691360473632812, 2.970733642578125, 2.3665542602539062, 2.7814559936523438, 1.8095645904541016, -0.14742088317871094, 2.098602294921875, 1.4505195617675781, -0.019937515258789062, 4.5919647216796875, -3.115682601928711, 2.265623092651367, -1.2686614990234375, -1.6813850402832031, 1.7452545166015625, -0.6488323211669922, -0.012847900390625, 1.2963428497314453, 0.682525634765625, 1.4354133605957031, 2.513093948364258, 1.9352188110351562, 0.6881046295166016, 1.890655517578125, -0.5116004943847656, 0.13105010986328125, -0.07158088684082031, 3.7909412384033203, 1.2265548706054688, 1.5615196228027344, 0.3252410888671875, 2.7563323974609375, -5.26123046875, 3.753631591796875, -0.7024478912353516, 1.2476043701171875, -0.39131927490234375, 2.8494873046875, 0.8621902465820312, 1.6164741516113281, 5.2655181884765625, 1.150604248046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000123.npy"}
|
||||
{"epoch": 0.18594104308390022, "step": 124, "batch_size": 64, "mean": 1.5572537183761597, "std": 1.812817931175232, "min": -3.443004608154297, "p10": -0.7239086151123046, "median": 1.5905084609985352, "p90": 4.147715759277344, "max": 5.9503326416015625, "pos_frac": 0.828125, "sample": [-0.4797935485839844, 5.594329833984375, 2.1671371459960938, -3.443004608154297, 1.5885753631591797, 1.7344970703125, 5.9503326416015625, 2.689105987548828, 2.5417861938476562, 4.01141357421875, 2.1843032836914062, 2.1316261291503906, 0.8657646179199219, 4.2061309814453125, 0.7794761657714844, 0.17674636840820312, 0.911712646484375, 2.8717918395996094, 0.13498687744140625, -1.138427734375, 4.6165771484375, 1.9161376953125, 2.7513465881347656, -0.6860198974609375, 0.15719223022460938, 1.5548324584960938, 0.7234249114990234, 0.639739990234375, 2.933349609375, -0.9209632873535156, 2.275705337524414, 1.505035400390625, 2.5183258056640625, 3.357574462890625, 2.137134552001953, 2.3824615478515625, 4.333229064941406, -1.6908416748046875, 4.698516845703125, 1.62255859375, 0.24378204345703125, 4.403556823730469, -0.7529830932617188, 3.5992050170898438, 1.7133293151855469, 1.2324142456054688, 2.5415115356445312, 0.7133026123046875, -1.0105838775634766, 2.976133346557617, 2.3488922119140625, 0.10013961791992188, 0.2679767608642578, 1.5924415588378906, 0.97784423828125, 0.0249481201171875, 0.85186767578125, -0.24554443359375, -0.7401466369628906, 1.1668853759765625, -0.4215888977050781, 2.0727386474609375, 0.6183891296386719, 3.0859222412109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000124.npy"}
|
||||
{"epoch": 0.1874527588813303, "step": 125, "batch_size": 64, "mean": 1.2849963903427124, "std": 1.9667152166366577, "min": -2.3039932250976562, "p10": -1.0053583145141602, "median": 1.1805877685546875, "p90": 3.62908172607422, "max": 7.3394775390625, "pos_frac": 0.703125, "sample": [3.251983642578125, 2.9025192260742188, 2.031768798828125, 1.1441650390625, 1.1538162231445312, -1.003824234008789, 7.3394775390625, 0.1768207550048828, 1.695892333984375, 1.974233627319336, 5.24090576171875, 0.614288330078125, -1.0060157775878906, 2.2942047119140625, 0.36153602600097656, 3.9266738891601562, 2.4251937866210938, 2.4857635498046875, 1.2451934814453125, -0.0731353759765625, 5.322723388671875, -0.137725830078125, 1.0884017944335938, 2.324737548828125, -0.054088592529296875, 1.7716541290283203, 3.7906951904296875, -0.9610328674316406, -1.8384475708007812, 0.6839218139648438, 3.23504638671875, -0.46435546875, -0.9453048706054688, 2.910247802734375, 2.729785919189453, -0.8623580932617188, 2.6535511016845703, 1.2073593139648438, 3.195587158203125, 2.758209228515625, 5.1093292236328125, -0.5168304443359375, 0.3496551513671875, 1.926788330078125, 1.5999794006347656, -2.3039932250976562, 0.701019287109375, 4.5225372314453125, -0.10836410522460938, 0.0248565673828125, -0.414794921875, 1.7042655944824219, 1.9722518920898438, 0.7789840698242188, -1.8187713623046875, 0.6733932495117188, 0.6905479431152344, -1.5536575317382812, 2.0134544372558594, -0.4167919158935547, 1.4409236907958984, -1.2718734741210938, -1.8927230834960938, 2.43951416015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000125.npy"}
|
||||
{"epoch": 0.1889644746787604, "step": 126, "batch_size": 64, "mean": 1.0903599262237549, "std": 2.377479314804077, "min": -4.0238494873046875, "p10": -1.1825531005859373, "median": 1.0214519500732422, "p90": 3.5551956176757815, "max": 11.56427001953125, "pos_frac": 0.65625, "sample": [1.8152885437011719, 0.2048187255859375, 3.5807971954345703, 1.0650444030761719, -0.31505584716796875, 3.9351844787597656, 4.002677917480469, 0.03836822509765625, 0.8324127197265625, -1.1090507507324219, 2.958261489868164, 3.043914794921875, -4.0238494873046875, 1.1266326904296875, 3.158111572265625, 0.9627704620361328, -0.5253715515136719, -1.2140541076660156, -0.20231056213378906, -0.5970306396484375, -0.120819091796875, 2.6900177001953125, 0.8894214630126953, 1.2223215103149414, -0.07214546203613281, -0.3885078430175781, 0.43090057373046875, -1.3042831420898438, 1.5850753784179688, -1.0650062561035156, 0.3760986328125, 0.9778594970703125, -2.148183822631836, 1.9110774993896484, -0.12023544311523438, -0.87158203125, -0.6872158050537109, 11.56427001953125, 2.0511531829833984, -3.9302139282226562, 1.136871337890625, 2.1912612915039062, 1.6807384490966797, 1.3065605163574219, 1.3185043334960938, 2.5752792358398438, 2.5994186401367188, 5.866058349609375, 1.1001739501953125, 3.0499649047851562, 0.9612083435058594, 3.8481597900390625, 1.9859695434570312, 1.690826416015625, 0.650482177734375, -0.4293060302734375, -1.329498291015625, 3.4954586029052734, 1.0987472534179688, 1.65020751953125, -0.9704132080078125, 6.0023040771484375, -3.2455387115478516, -0.1779632568359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000126.npy"}
|
||||
{"epoch": 0.19047619047619047, "step": 127, "batch_size": 64, "mean": 1.5069829225540161, "std": 1.7780834436416626, "min": -3.4476318359375, "p10": -0.3646175384521484, "median": 1.310089111328125, "p90": 4.163696670532227, "max": 5.693977355957031, "pos_frac": 0.8125, "sample": [-0.949005126953125, 3.4367218017578125, 1.6870956420898438, 3.0241851806640625, 0.46096038818359375, 2.0186614990234375, 1.945465087890625, 4.206295013427734, 0.129119873046875, 4.064300537109375, 3.0395984649658203, 0.9063644409179688, 1.7615509033203125, 3.7553768157958984, 2.8719863891601562, 0.797454833984375, 2.5204315185546875, 1.6343612670898438, -0.5903244018554688, 1.277801513671875, 2.4385910034179688, -3.4476318359375, 1.9401626586914062, 4.507362365722656, 1.8120460510253906, -1.459014892578125, 1.7195053100585938, 0.7595748901367188, -0.8731231689453125, 1.342376708984375, -0.10619735717773438, 0.9725799560546875, 0.299835205078125, 3.2149124145507812, -0.07046890258789062, 3.5446815490722656, 0.10387992858886719, 4.619110107421875, 3.713672637939453, 5.693977355957031, 0.5665359497070312, 2.8528480529785156, 0.536346435546875, -1.337026596069336, 2.1692657470703125, 0.6645278930664062, 0.30194091796875, -0.15004348754882812, 4.5290069580078125, -0.3170623779296875, 0.6160736083984375, 4.237701416015625, 1.2462654113769531, 0.08603668212890625, 1.0616798400878906, 0.6522293090820312, 1.8256072998046875, 0.17174530029296875, 0.337493896484375, 2.0291271209716797, 1.7441730499267578, -0.17930221557617188, 4.462501525878906, -0.3849983215332031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000127.npy"}
|
||||
{"epoch": 0.19198790627362056, "step": 128, "batch_size": 64, "mean": 1.274397850036621, "std": 2.364651679992676, "min": -5.11279296875, "p10": -1.5812381744384765, "median": 1.2953605651855469, "p90": 5.0804510116577175, "max": 5.846099853515625, "pos_frac": 0.6875, "sample": [1.020172119140625, 3.7335205078125, 0.7689857482910156, 2.2906360626220703, 1.7310447692871094, 1.2782058715820312, 2.0517578125, 1.534454345703125, 1.2776298522949219, 5.6467742919921875, 1.6109733581542969, -3.1127471923828125, 3.6829681396484375, -1.034952163696289, 1.840463638305664, -1.5174369812011719, 0.5829315185546875, -1.738037109375, -1.9334869384765625, 3.8057632446289062, 4.4640350341796875, -2.530477523803711, -1.4577102661132812, 5.846099853515625, 5.756683349609375, 1.99285888671875, 0.23392486572265625, 5.467876434326172, -0.6130046844482422, 3.342538833618164, -0.026702880859375, 2.5239181518554688, 1.2309646606445312, 2.261383056640625, 2.1656875610351562, 1.4100189208984375, -0.08466339111328125, 5.344629287719727, -2.3313846588134766, -0.9457550048828125, -1.60858154296875, 1.4836902618408203, 1.5093231201171875, 0.07089042663574219, -0.8067550659179688, -0.8746337890625, 1.8429737091064453, 2.6058006286621094, 3.08184814453125, -1.1639862060546875, 1.3125152587890625, 0.9539337158203125, 5.550750732421875, 0.298553466796875, 2.833761215209961, 0.6568450927734375, -0.1201019287109375, 1.6555099487304688, 5.6600799560546875, -0.4383354187011719, -5.11279296875, -0.10076904296875, 3.4796371459960938, 1.22076416015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000128.npy"}
|
||||
{"epoch": 0.19349962207105065, "step": 129, "batch_size": 64, "mean": 1.137814998626709, "std": 2.0925745964050293, "min": -3.9213409423828125, "p10": -1.0645946502685546, "median": 0.9173078536987305, "p90": 3.6567045211791998, "max": 7.897693634033203, "pos_frac": 0.65625, "sample": [2.1095199584960938, -0.3651447296142578, 2.196687698364258, -0.6854248046875, -0.5300884246826172, 4.509571075439453, -2.181610107421875, -1.0786170959472656, -0.26520538330078125, 3.6904754638671875, 2.7605209350585938, 1.060638427734375, 2.379150390625, -0.23963165283203125, -1.4533462524414062, -1.0318756103515625, 3.3077392578125, 0.32166481018066406, 4.79693603515625, 4.054744720458984, 0.5321807861328125, -0.141357421875, 0.9534225463867188, 1.1910171508789062, -0.9640254974365234, 5.07666015625, 2.5444698333740234, -0.6394844055175781, 1.6414127349853516, 3.0464019775390625, 2.6667404174804688, 0.796630859375, -2.123565673828125, 3.5779056549072266, 0.37960243225097656, 0.9391098022460938, -2.0468521118164062, 0.22426605224609375, 0.8955059051513672, 5.040561676025391, 1.8309326171875, 0.4759254455566406, -0.4865074157714844, 1.129190444946289, 1.5400123596191406, 2.12725830078125, 0.1055450439453125, 7.897693634033203, 2.4934158325195312, 3.5488052368164062, 1.3949165344238281, 2.6941452026367188, 3.2944717407226562, -0.49074554443359375, -0.038906097412109375, -1.387399673461914, 2.5507240295410156, -3.9213409423828125, -0.47784423828125, -0.2222919464111328, 0.4030628204345703, -0.214080810546875, 0.09304237365722656, 1.5328292846679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000129.npy"}
|
||||
{"epoch": 0.19501133786848074, "step": 130, "batch_size": 64, "mean": 1.3026278018951416, "std": 1.850061297416687, "min": -2.084442138671875, "p10": -1.0244537353515621, "median": 1.3249130249023438, "p90": 3.590840530395508, "max": 6.692935943603516, "pos_frac": 0.703125, "sample": [2.415538787841797, 0.5093460083007812, 5.4495697021484375, 2.4058837890625, 4.181419372558594, 2.0460357666015625, -0.6228790283203125, 3.491525650024414, 1.0045642852783203, -0.05933380126953125, -1.420501708984375, 2.3661956787109375, 1.5608673095703125, 0.6992340087890625, -1.1461410522460938, 1.4779510498046875, 1.894195556640625, 0.6728134155273438, 1.8009567260742188, 3.1688003540039062, 2.5759105682373047, 3.1009292602539062, 4.4735870361328125, 3.2058486938476562, -0.2005615234375, -1.7346000671386719, 0.06824493408203125, -0.09683990478515625, -1.5045795440673828, 0.42156219482421875, 2.14422607421875, 3.5075302124023438, 2.5283966064453125, 1.5737266540527344, 0.01024627685546875, 6.692935943603516, 2.0216598510742188, 1.1330146789550781, 4.779083251953125, -0.15555953979492188, -0.4071083068847656, 3.626544952392578, -0.7405166625976562, -2.084442138671875, 1.7088050842285156, 1.7393054962158203, 1.17510986328125, 0.17364501953125, 2.0272674560546875, -0.00457763671875, -1.7058067321777344, 1.5707359313964844, -0.525177001953125, 1.7603187561035156, -1.3106842041015625, 1.3245658874511719, -0.052947998046875, 1.3252601623535156, -0.0037994384765625, 0.271240234375, 4.653934478759766, 1.4860668182373047, 0.9958381652832031, -0.076202392578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000130.npy"}
|
||||
{"epoch": 0.1965230536659108, "step": 131, "batch_size": 64, "mean": 1.5919833183288574, "std": 2.3745267391204834, "min": -4.154693603515625, "p10": -1.2419084548950194, "median": 1.5387611389160156, "p90": 4.186201477050782, "max": 6.316986083984375, "pos_frac": 0.75, "sample": [0.46950531005859375, 1.9403629302978516, 0.35753440856933594, -0.12862205505371094, -1.243316650390625, 6.29559326171875, 1.6919746398925781, 2.1745872497558594, -1.2386226654052734, 3.8395538330078125, 1.3737869262695312, 0.5194587707519531, -1.1138496398925781, -1.6181869506835938, 1.2576866149902344, 2.6520557403564453, -3.0724639892578125, 1.5129776000976562, 3.8958892822265625, 0.3555755615234375, -1.2346038818359375, -1.5923843383789062, 3.4299774169921875, 3.188995361328125, 1.896881103515625, 0.7175750732421875, 1.8621845245361328, -3.2249832153320312, 0.5564537048339844, -0.0538330078125, 2.3409996032714844, 3.6071853637695312, 5.481689453125, 3.564085006713867, 3.8498382568359375, 3.811309814453125, 0.8478889465332031, 6.316986083984375, 1.4807777404785156, 4.252105712890625, 1.564544677734375, 6.163734436035156, -1.1188774108886719, 4.0324249267578125, 1.6078262329101562, 0.19188690185546875, 0.8969821929931641, 3.9248046875, -1.6371307373046875, 2.53118896484375, -0.2605247497558594, -0.4356536865234375, 2.7846412658691406, 0.2974433898925781, -4.154693603515625, 3.2611007690429688, 0.6315765380859375, 1.0576248168945312, 3.7075958251953125, -0.6829605102539062, 1.718719482421875, 4.730655670166016, 6.24629020690918, 3.8071250915527344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000131.npy"}
|
||||
{"epoch": 0.1980347694633409, "step": 132, "batch_size": 64, "mean": 1.4087748527526855, "std": 2.2905361652374268, "min": -5.104745864868164, "p10": -1.1350505828857422, "median": 1.3412647247314453, "p90": 4.159173774719238, "max": 7.3024139404296875, "pos_frac": 0.78125, "sample": [4.7308807373046875, 0.786468505859375, 2.000396728515625, 2.71356201171875, -0.29471588134765625, -1.0752601623535156, 2.70068359375, 0.20165443420410156, -1.36383056640625, 3.535308837890625, 4.122928619384766, 3.878713607788086, 1.9688606262207031, 1.9394607543945312, -2.4359970092773438, 1.171844482421875, -0.890167236328125, 3.9956817626953125, 0.6274948120117188, -1.160675048828125, 0.00334930419921875, -5.0576324462890625, 2.7199459075927734, 0.617828369140625, -0.9842605590820312, -1.7537994384765625, 2.341766357421875, 3.7929611206054688, 4.174707412719727, -0.5671463012695312, -0.0620574951171875, 0.42407989501953125, -5.104745864868164, 1.2187042236328125, 1.6332855224609375, 1.5112152099609375, 5.699897766113281, 2.788990020751953, 3.4211997985839844, 2.4056320190429688, 0.019931793212890625, 7.3024139404296875, 0.523468017578125, 3.5026321411132812, 3.6767635345458984, 1.3210639953613281, 0.2859649658203125, 0.17644119262695312, 1.1766719818115234, 1.3958683013916016, 2.7075424194335938, 0.5374755859375, 4.95648193359375, 2.468130111694336, 4.256986618041992, 1.3614654541015625, 2.000335693359375, -0.8956661224365234, 0.5799102783203125, 2.3581771850585938, -1.444549560546875, 1.1557769775390625, 4.190887451171875, 0.170196533203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000132.npy"}
|
||||
{"epoch": 0.19954648526077098, "step": 133, "batch_size": 64, "mean": 1.2651795148849487, "std": 2.1677074432373047, "min": -4.8975982666015625, "p10": -0.7081878662109374, "median": 1.1478538513183594, "p90": 3.7333395004272467, "max": 7.72064208984375, "pos_frac": 0.75, "sample": [3.23333740234375, -0.6528472900390625, 1.03753662109375, 3.8930587768554688, 0.3613109588623047, 0.5369110107421875, 0.3483695983886719, 4.2965545654296875, 0.3028125762939453, -0.4190940856933594, 2.3793869018554688, 4.5732574462890625, 3.0771865844726562, 1.1871147155761719, -2.0432968139648438, 2.2477645874023438, 2.8361587524414062, -4.8975982666015625, 2.3357009887695312, -0.313568115234375, -0.6782913208007812, 1.3065643310546875, -0.08236312866210938, 3.2954883575439453, 2.6116676330566406, -0.04412841796875, 2.842132568359375, 0.139862060546875, 0.35697174072265625, -0.56964111328125, 1.9483680725097656, 1.0000553131103516, -3.5364227294921875, 3.5046005249023438, -0.7210006713867188, 1.7826004028320312, -0.5089607238769531, 0.66552734375, 3.5414352416992188, 1.8518524169921875, 1.1435089111328125, 1.7087173461914062, 1.9031524658203125, -3.470071792602539, 0.685455322265625, 5.267547607421875, 2.3751144409179688, 3.07012939453125, 1.1521987915039062, 7.72064208984375, 0.49925994873046875, 0.0019092559814453125, -0.6513767242431641, 4.7306976318359375, 1.8752365112304688, 0.5784950256347656, 2.5627975463867188, 3.815584182739258, 1.9264602661132812, -1.1824588775634766, 3.4855880737304688, 0.09984588623046875, -2.062732696533203, 0.7094154357910156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000133.npy"}
|
||||
{"epoch": 0.20105820105820105, "step": 134, "batch_size": 64, "mean": 1.8496698141098022, "std": 2.353973150253296, "min": -2.8619155883789062, "p10": -0.7533248901367187, "median": 1.800398826599121, "p90": 4.382669067382814, "max": 8.364395141601562, "pos_frac": 0.78125, "sample": [0.9453964233398438, -0.0349273681640625, 1.9678878784179688, 1.9243240356445312, 1.1247634887695312, 1.7977428436279297, 0.5507965087890625, 3.043792724609375, 4.5501556396484375, 2.62646484375, 2.8604278564453125, 2.987060546875, 2.9737014770507812, 2.4530982971191406, 0.7863922119140625, -0.10682106018066406, 2.9958457946777344, 1.8030548095703125, -0.32175445556640625, 1.7891654968261719, 3.3011245727539062, 1.5281982421875, 3.823862075805664, 1.08013916015625, 0.5780563354492188, 5.077728271484375, 0.16822052001953125, 0.8745498657226562, -1.4845466613769531, 2.943723678588867, -1.7435970306396484, -0.1972980499267578, 3.8299102783203125, 0.5018157958984375, 0.28783416748046875, -2.5492172241210938, 2.1056289672851562, 1.7287425994873047, 2.6454010009765625, 2.4646358489990234, 8.198104858398438, 3.0371227264404297, -2.8189239501953125, -2.8619155883789062, 2.876361846923828, -0.7733612060546875, 4.876708984375, -0.706573486328125, 6.41680908203125, 1.2622032165527344, -0.24021148681640625, 3.4138259887695312, 3.2307662963867188, 2.5036163330078125, 7.8022613525390625, 0.046695709228515625, -1.3241958618164062, 3.0389480590820312, 1.5184593200683594, 2.453968048095703, 3.9918670654296875, 8.364395141601562, 0.63116455078125, -0.24071311950683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000134.npy"}
|
||||
{"epoch": 0.20256991685563114, "step": 135, "batch_size": 64, "mean": 1.280935287475586, "std": 2.349777936935425, "min": -4.31121826171875, "p10": -1.2437076568603513, "median": 0.9672956466674805, "p90": 3.9173751831054697, "max": 7.4559478759765625, "pos_frac": 0.703125, "sample": [2.628387451171875, -1.1166152954101562, 3.999237060546875, 0.4171276092529297, 1.1225528717041016, -1.2981758117675781, 0.7075271606445312, -2.0132675170898438, -0.4265575408935547, 7.4559478759765625, 2.746856689453125, 0.24016571044921875, 3.2399826049804688, -0.29073524475097656, 0.0409088134765625, -1.7316741943359375, 3.7263641357421875, 2.6539306640625, -0.6894817352294922, 1.1308574676513672, 1.320688247680664, -0.1261444091796875, -4.31121826171875, 0.7447738647460938, 1.0537071228027344, 4.261104583740234, 1.0641326904296875, 0.2715301513671875, 0.6756515502929688, 0.8138580322265625, 0.249481201171875, 1.3579216003417969, 3.2247314453125, 0.8808841705322266, 2.819671630859375, 1.3276004791259766, 0.6911392211914062, 1.8365859985351562, 3.4892959594726562, -3.587736129760742, -0.034496307373046875, 6.4465789794921875, -0.058612823486328125, 1.5620880126953125, 2.080770492553711, 0.7882766723632812, -0.24317169189453125, -2.8627986907958984, 3.1104297637939453, 2.4784622192382812, 3.1901283264160156, -2.8662872314453125, -0.01378631591796875, 2.826568603515625, 6.012229919433594, 6.852634429931641, -0.8597278594970703, 2.782459259033203, -0.4756641387939453, 2.301738739013672, 0.4974212646484375, 2.962268829345703, 5.2057342529296875, -0.30438995361328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000135.npy"}
|
||||
{"epoch": 0.20408163265306123, "step": 136, "batch_size": 64, "mean": 1.5897669792175293, "std": 3.2204182147979736, "min": -5.156352996826172, "p10": -2.1817604064941403, "median": 1.6326560974121094, "p90": 6.306516075134279, "max": 10.261978149414062, "pos_frac": 0.671875, "sample": [9.46630859375, 4.3237762451171875, -3.828836441040039, -1.4013633728027344, -1.515106201171875, 2.2429580688476562, -0.29010009765625, 1.0514869689941406, -0.0711212158203125, 2.015239715576172, 1.6806869506835938, -3.361236572265625, -0.41317176818847656, 2.7263031005859375, -0.6519241333007812, 2.1203155517578125, 5.971460342407227, 0.8879203796386719, 0.9122295379638672, -1.811065673828125, 3.09442138671875, 0.9633979797363281, 3.0479888916015625, 6.450111389160156, -1.019378662109375, 0.15206146240234375, 6.5771026611328125, 1.8807334899902344, 1.303253173828125, 9.073944091796875, 4.3547821044921875, 0.296173095703125, 2.47503662109375, 4.10546875, -0.90008544921875, 1.9637451171875, -0.4768829345703125, 1.9192676544189453, 2.1675872802734375, 1.5847396850585938, -5.156352996826172, 2.9746322631835938, 1.8738555908203125, 3.81927490234375, 0.5551643371582031, 4.0702667236328125, -0.5097732543945312, 1.1977386474609375, -2.7700042724609375, 10.261978149414062, 2.0586929321289062, -3.237701416015625, -2.3406295776367188, 1.680572509765625, -0.11077880859375, 2.4379425048828125, 1.8466949462890625, -0.15285491943359375, 7.218969345092773, 8.098104476928711, 3.533233642578125, -0.4122772216796875, 0.3953437805175781, -4.655235290527344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000136.npy"}
|
||||
{"epoch": 0.20559334845049132, "step": 137, "batch_size": 64, "mean": 1.667122721672058, "std": 2.598249673843384, "min": -4.374359130859375, "p10": -1.3177921295166015, "median": 1.6989173889160156, "p90": 4.683762359619141, "max": 9.734733581542969, "pos_frac": 0.71875, "sample": [-0.34436798095703125, -4.374359130859375, 2.8244285583496094, 3.8088226318359375, 3.208343505859375, 5.2240753173828125, 3.616870880126953, 5.1625518798828125, -0.724395751953125, 4.7303466796875, 3.443113327026367, 2.3254241943359375, 5.188518524169922, 2.5312271118164062, 4.575065612792969, 1.0676345825195312, 2.070526123046875, 4.3031768798828125, -0.2856597900390625, 0.6490516662597656, -1.1644916534423828, 2.9988021850585938, 2.7674713134765625, -2.5200424194335938, 0.9133014678955078, -0.1211090087890625, 3.5236663818359375, 9.734733581542969, -1.6250190734863281, -0.03499603271484375, -2.7249374389648438, 0.16170501708984375, 0.5903472900390625, 2.8286361694335938, 8.299026489257812, 0.5084762573242188, 0.1534290313720703, -1.9673385620117188, 1.7803421020507812, -0.01953887939453125, -0.8981666564941406, 0.94732666015625, 2.272890090942383, 2.353668212890625, 3.18408203125, -0.6624317169189453, 1.1400718688964844, 4.253541946411133, 1.61749267578125, 5.0636138916015625, 0.3477592468261719, -1.0466041564941406, 0.04286766052246094, -2.6517086029052734, 3.6144046783447266, 2.120656967163086, 0.8889808654785156, -1.2305030822753906, -1.3552017211914062, 2.754241943359375, 0.5931549072265625, 3.7974929809570312, 3.98504638671875, 2.480316162109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000137.npy"}
|
||||
{"epoch": 0.20710506424792138, "step": 138, "batch_size": 64, "mean": 1.3133950233459473, "std": 2.468076229095459, "min": -5.841941833496094, "p10": -1.815510559082031, "median": 1.414240837097168, "p90": 3.989173412322998, "max": 7.684814453125, "pos_frac": 0.703125, "sample": [-0.2015533447265625, 3.6788330078125, 2.672189712524414, 4.7480621337890625, 1.5758819580078125, 1.6805648803710938, 1.8854217529296875, 2.6700286865234375, -1.8913116455078125, 1.2248497009277344, 0.7771873474121094, -3.5723876953125, 3.704437255859375, 0.6035614013671875, 7.684814453125, -1.638641357421875, -2.00787353515625, 3.7362022399902344, 2.7073516845703125, 3.0161170959472656, 1.0428886413574219, 1.8027420043945312, 4.7656402587890625, 3.3928871154785156, -0.3261260986328125, 2.6062374114990234, 3.4405155181884766, 2.0165252685546875, 1.0862579345703125, 0.5667591094970703, 3.9961671829223633, 0.036540985107421875, 4.340959548950195, -1.1815872192382812, -1.4498748779296875, -0.0006618499755859375, -0.012073516845703125, -3.526947021484375, -0.43024444580078125, 3.1826019287109375, 1.0720138549804688, 3.9728546142578125, -0.22179412841796875, 1.6032485961914062, -0.3389015197753906, -5.841941833496094, 6.651226043701172, 0.9502544403076172, 1.7545242309570312, 0.9237823486328125, 1.304351806640625, 1.554901123046875, 3.2406005859375, -1.4068565368652344, -2.7519073486328125, -0.23334121704101562, -1.9131908416748047, 1.524129867553711, 0.17678451538085938, 1.7052001953125, 5.771095275878906, 1.8804359436035156, 3.923583984375, 0.3532867431640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000138.npy"}
|
||||
{"epoch": 0.20861678004535147, "step": 139, "batch_size": 64, "mean": 1.660610318183899, "std": 2.267900228500366, "min": -3.4349822998046875, "p10": -1.0887405395507812, "median": 1.4186944961547852, "p90": 4.846666717529297, "max": 7.917362213134766, "pos_frac": 0.765625, "sample": [1.6427345275878906, 4.618705749511719, -0.31917572021484375, 0.1953125, -2.059345245361328, 2.7437095642089844, 1.1809310913085938, 4.432403564453125, 2.1678123474121094, 0.5361194610595703, 0.18828201293945312, 7.917362213134766, 3.4662952423095703, -0.6970024108886719, 2.082275390625, 5.383628845214844, 6.3276824951171875, 1.0361881256103516, -1.066192626953125, 3.6096839904785156, -1.0984039306640625, -3.4349822998046875, -0.7135238647460938, 1.1861248016357422, 2.475372314453125, 1.3853988647460938, 2.0402374267578125, 1.3319950103759766, 4.333347320556641, 1.9278717041015625, 2.1939849853515625, 3.4630889892578125, 0.8407745361328125, 0.15765380859375, -0.7464828491210938, 3.25665283203125, 2.1391448974609375, 1.8677978515625, 0.6757659912109375, -0.24471664428710938, 4.223178863525391, 1.1620750427246094, 5.719394683837891, -0.06967926025390625, -1.3580360412597656, -0.3753013610839844, 1.1949234008789062, 1.851776123046875, 4.884521484375, 0.4615669250488281, 4.974822998046875, 0.7025032043457031, 2.6844863891601562, -2.0028305053710938, -1.3267822265625, 1.1321029663085938, 0.18968963623046875, 2.5469093322753906, -2.0330066680908203, 2.6948394775390625, 4.898033142089844, 4.758338928222656, 1.4519901275634766, 1.4890289306640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000139.npy"}
|
||||
{"epoch": 0.21012849584278157, "step": 140, "batch_size": 64, "mean": 2.132258415222168, "std": 3.295691728591919, "min": -9.440078735351562, "p10": -1.9424478530883786, "median": 2.2515125274658203, "p90": 5.923083114624023, "max": 8.535467147827148, "pos_frac": 0.75, "sample": [6.39111328125, 2.1513671875, 0.2957191467285156, 0.09160614013671875, 1.15753173828125, 2.8096160888671875, -3.464935302734375, -2.8914718627929688, 4.5115203857421875, 3.9815750122070312, -9.440078735351562, 2.6455307006835938, 3.129364013671875, 1.402008056640625, 5.926242828369141, 2.2742156982421875, -2.1377201080322266, 0.962738037109375, -0.710174560546875, 5.238990783691406, 2.127176284790039, 5.46588134765625, 2.751056671142578, 2.7824935913085938, 0.2533226013183594, 2.387664794921875, 1.96368408203125, 8.535467147827148, 1.2679595947265625, -2.074005126953125, -0.82098388671875, -1.2500076293945312, 3.77960205078125, -0.4946746826171875, -0.5363845825195312, -0.8603744506835938, 5.91571044921875, 6.578399658203125, 5.9135894775390625, 5.548248291015625, 2.476043701171875, 5.281425476074219, 7.489654541015625, 5.504283905029297, 2.228809356689453, -2.5635452270507812, 5.807891845703125, 1.233245849609375, 6.817741394042969, -1.228738784790039, -3.8403358459472656, 3.974872589111328, 1.1254653930664062, 2.154024124145508, 7.246910095214844, 1.2609004974365234, 5.732242584228516, 0.1890106201171875, 3.25299072265625, 2.520843505859375, -0.13513565063476562, 5.194980621337891, -1.6354808807373047, 2.8178634643554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000140.npy"}
|
||||
{"epoch": 0.21164021164021163, "step": 141, "batch_size": 64, "mean": 2.0809690952301025, "std": 3.152566432952881, "min": -5.137542724609375, "p10": -1.156987953186035, "median": 1.457611083984375, "p90": 6.244688796997072, "max": 9.734756469726562, "pos_frac": 0.75, "sample": [-5.137542724609375, 0.16070556640625, 5.17340087890625, 0.13635635375976562, 3.4754867553710938, 0.6926498413085938, 0.8580169677734375, 3.5275650024414062, 0.8159084320068359, -3.5533294677734375, 2.4217147827148438, 1.95098876953125, 4.654451370239258, -1.5460453033447266, 6.386631011962891, 2.6198463439941406, 3.5817222595214844, 0.2889976501464844, -0.37165069580078125, -3.2509231567382812, 1.7304534912109375, 0.5345592498779297, 0.5577392578125, 3.0602569580078125, 3.9552536010742188, 0.5965728759765625, 0.9966888427734375, 3.9149627685546875, -0.5892333984375, -1.2227783203125, -0.46968841552734375, -2.2256393432617188, 2.0143356323242188, 2.4087600708007812, 4.247562408447266, -0.5755596160888672, 4.908649444580078, 0.39644622802734375, 9.734756469726562, 6.5001983642578125, 5.861259460449219, 4.523565292358398, 3.490184783935547, 3.114990234375, 5.913490295410156, -0.820220947265625, -0.3579368591308594, 2.4617538452148438, 7.942623138427734, 5.28485107421875, -2.4844894409179688, 0.58111572265625, 1.0268440246582031, 9.617523193359375, 3.0886383056640625, 9.592681884765625, -1.0034770965576172, 7.347208023071289, -0.0442047119140625, 1.1847686767578125, 2.340179443359375, 1.0710258483886719, -0.0154266357421875, 0.10582733154296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000141.npy"}
|
||||
{"epoch": 0.21315192743764172, "step": 142, "batch_size": 64, "mean": 1.5259199142456055, "std": 2.5054047107696533, "min": -3.917621612548828, "p10": -1.3781299591064453, "median": 1.3596153259277344, "p90": 5.084188270568849, "max": 7.42950439453125, "pos_frac": 0.734375, "sample": [0.9991302490234375, -3.200489044189453, 0.8340187072753906, 3.0761489868164062, 4.7741851806640625, 7.42950439453125, 2.2259750366210938, -1.3529624938964844, 2.013294219970703, 3.5559844970703125, 1.72076416015625, -0.18915176391601562, 3.582761764526367, -3.917621612548828, 0.38642120361328125, 2.6284866333007812, 2.448314666748047, -3.537649154663086, 1.170785903930664, 1.240793228149414, 1.7781448364257812, 3.354339599609375, 1.8986339569091797, 2.5037498474121094, 1.8295135498046875, 3.2677459716796875, -0.31931304931640625, 0.4149627685546875, 7.208198547363281, -1.388916015625, 1.3443832397460938, -0.0628509521484375, 5.371124267578125, 6.733734130859375, 1.374847412109375, -2.4651336669921875, 0.7386894226074219, -2.0449256896972656, -0.086700439453125, 0.9757308959960938, 6.808868408203125, 5.35809326171875, 0.5638275146484375, 0.7571945190429688, 0.15488433837890625, 3.616191864013672, -2.2136173248291016, 1.1385345458984375, -0.24725723266601562, 2.8908538818359375, -0.7036399841308594, -1.1175956726074219, 1.5027313232421875, 3.263275146484375, 0.3953819274902344, 1.5824050903320312, 3.7818145751953125, 5.217046737670898, 2.920654296875, -0.5153064727783203, 0.25147247314453125, -1.100198745727539, 3.1582183837890625, 1.8803901672363281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000142.npy"}
|
||||
{"epoch": 0.2146636432350718, "step": 143, "batch_size": 64, "mean": 1.4359562397003174, "std": 3.072786569595337, "min": -8.278488159179688, "p10": -1.4724666595458982, "median": 1.1064672470092773, "p90": 4.976819610595704, "max": 8.38107681274414, "pos_frac": 0.765625, "sample": [1.1269760131835938, 2.77252197265625, 2.7163925170898438, 2.6909141540527344, 2.140167236328125, -0.857696533203125, 1.6248016357421875, -0.5235080718994141, 5.902130126953125, -3.0090103149414062, 4.090017318725586, 0.68255615234375, 6.37493896484375, 1.0358772277832031, 3.0700454711914062, 2.453784942626953, -5.970916748046875, 2.4270477294921875, 4.5230865478515625, 0.8242149353027344, 5.3577880859375, -1.1187095642089844, 8.38107681274414, 4.5004730224609375, 2.926492691040039, 1.2074413299560547, 2.2172088623046875, -4.80865478515625, 0.6958198547363281, 0.3760948181152344, 0.6756248474121094, 1.085958480834961, -1.5830841064453125, 3.8286590576171875, 6.909263610839844, -1.2143592834472656, -0.9838943481445312, 0.230865478515625, 0.33185577392578125, 1.6236648559570312, 0.9500541687011719, -8.278488159179688, 5.007904052734375, 0.7746047973632812, -0.6269035339355469, 0.456695556640625, 2.6197738647460938, 4.2454833984375, 0.4356422424316406, 0.021945953369140625, -1.5998992919921875, 1.551849365234375, 4.311305999755859, 7.941104888916016, -5.3216400146484375, 4.904289245605469, 2.0106582641601562, 1.009490966796875, 4.2062225341796875, 2.724294662475586, -0.9390087127685547, -0.7016716003417969, 1.0480804443359375, 0.4154815673828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000143.npy"}
|
||||
{"epoch": 0.2161753590325019, "step": 144, "batch_size": 64, "mean": 1.7819199562072754, "std": 3.1078243255615234, "min": -7.823204040527344, "p10": -1.9535797119140623, "median": 1.5062274932861328, "p90": 6.215809631347659, "max": 8.462692260742188, "pos_frac": 0.703125, "sample": [6.461091995239258, 3.9133644104003906, -2.0771026611328125, 0.3196735382080078, -1.1152000427246094, 1.0944747924804688, -1.1681690216064453, 3.6669960021972656, 0.7800674438476562, 0.4123382568359375, -1.6505889892578125, 1.1992607116699219, 1.4589958190917969, 2.523672103881836, 4.0863037109375, 2.0123291015625, 5.517402648925781, 0.29627227783203125, -1.6653594970703125, 2.7203445434570312, 3.855743408203125, 2.9870681762695312, 1.75823974609375, -0.20107269287109375, -0.8297691345214844, 1.3075714111328125, 3.3245620727539062, 5.239078521728516, -1.4107704162597656, -0.183258056640625, 4.0696868896484375, 1.2756729125976562, -2.7471466064453125, -0.23775482177734375, 3.4935150146484375, -0.125152587890625, -1.0367412567138672, 5.643484115600586, 6.7676544189453125, 7.129753112792969, 3.829925537109375, 2.415679931640625, -2.1093711853027344, 4.3940277099609375, -2.1196632385253906, 1.5534591674804688, 2.4022369384765625, -2.129396438598633, 5.003673553466797, 0.08196258544921875, 0.5425243377685547, -1.1210098266601562, 1.7349853515625, 7.76673698425293, -2.7636260986328125, 7.16630744934082, 8.462692260742188, 7.5658416748046875, 0.6461944580078125, -7.823204040527344, 2.1581459045410156, 0.5161762237548828, 3.5992088317871094, 3.4028377532958984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000144.npy"}
|
||||
{"epoch": 0.21768707482993196, "step": 145, "batch_size": 64, "mean": 1.7780449390411377, "std": 3.2411301136016846, "min": -7.9468536376953125, "p10": -1.4815284729003906, "median": 1.9156560897827148, "p90": 6.343585205078129, "max": 9.548049926757812, "pos_frac": 0.71875, "sample": [2.585865020751953, 3.101276397705078, -1.5298023223876953, 0.0372772216796875, -0.1625518798828125, -7.9468536376953125, 2.7735137939453125, -0.6192092895507812, 9.018749237060547, 2.665069580078125, 2.243185043334961, 3.4321823120117188, 0.73504638671875, 2.9446945190429688, 0.4153118133544922, 2.0617904663085938, -1.041839599609375, 5.412605285644531, -1.2952117919921875, 2.293376922607422, 0.33989715576171875, 1.1436080932617188, -1.4723320007324219, 3.309722900390625, 6.97016716003418, -0.12325286865234375, -1.158721923828125, 3.057708740234375, 4.8143310546875, 0.047916412353515625, 9.548049926757812, 3.7925262451171875, 2.16082763671875, 8.378280639648438, 1.791473388671875, 6.742576599121094, 2.000425338745117, 4.745882034301758, 2.1870346069335938, 4.330890655517578, -3.83001708984375, 2.817249298095703, 7.0277557373046875, 4.260538101196289, 0.03334236145019531, -2.9158935546875, 1.2168102264404297, 1.7244606018066406, -1.1744308471679688, 0.9029922485351562, 3.250621795654297, 2.536182403564453, 4.665555953979492, -0.7716445922851562, 1.4144821166992188, 7.0250244140625, 1.8308868408203125, -1.4854698181152344, -5.144317626953125, 1.0511627197265625, 4.01312255859375, -2.0832748413085938, -1.41473388671875, -0.8870162963867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000145.npy"}
|
||||
{"epoch": 0.21919879062736206, "step": 146, "batch_size": 64, "mean": 2.0234241485595703, "std": 2.919635534286499, "min": -3.1171646118164062, "p10": -1.9721508026123045, "median": 1.8979463577270508, "p90": 5.445403289794923, "max": 12.089244842529297, "pos_frac": 0.75, "sample": [4.2682342529296875, 4.457866668701172, -1.378997802734375, 1.9684371948242188, 1.3214340209960938, -1.7005691528320312, -1.0812911987304688, 3.191661834716797, 3.383819580078125, 2.8523178100585938, -2.8149185180664062, -2.5459365844726562, 4.5879058837890625, 3.4891014099121094, 0.9796981811523438, -2.3692779541015625, 4.044145584106445, -0.11237335205078125, 4.769651412963867, -3.1171646118164062, 2.5003128051757812, 6.0679168701171875, 1.708099365234375, 0.7337284088134766, -2.948528289794922, -0.3573417663574219, 1.7590694427490234, 5.694183349609375, -1.691070556640625, 1.8629989624023438, 2.102235794067383, -2.9470596313476562, 8.599472045898438, 1.9994049072265625, 0.9034080505371094, -0.4454193115234375, 1.5781974792480469, 0.5204086303710938, 2.544921875, -0.8243045806884766, 5.985603332519531, 1.9328937530517578, 0.8822250366210938, 3.6006546020507812, 5.06890869140625, 6.470668792724609, 3.886219024658203, 2.3640823364257812, 4.660430908203125, 1.638936996459961, 3.0553150177001953, 2.43328857421875, 5.090604782104492, 1.7548294067382812, -2.088542938232422, -0.8735809326171875, 12.089244842529297, 0.431427001953125, 1.6968708038330078, 1.6658439636230469, 4.0919189453125, 2.6700592041015625, 1.83941650390625, 5.59745979309082], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000146.npy"}
|
||||
{"epoch": 0.22071050642479215, "step": 147, "batch_size": 64, "mean": 1.463640570640564, "std": 3.2404589653015137, "min": -10.02978515625, "p10": -1.5384628295898437, "median": 1.2258644104003906, "p90": 4.743067932128906, "max": 8.375919342041016, "pos_frac": 0.6875, "sample": [1.988504409790039, 1.6911506652832031, 4.105142593383789, -0.4842185974121094, 2.1132125854492188, 3.33447265625, -2.9101028442382812, 1.13262939453125, -1.0794219970703125, -0.8023815155029297, 3.959320068359375, 2.6041030883789062, 4.2649078369140625, 4.161346435546875, -0.5402145385742188, 4.738037109375, 7.793342590332031, -10.02978515625, 3.6935348510742188, -0.7675209045410156, 1.9857521057128906, 1.4157791137695312, 0.32149314880371094, -0.11065673828125, 0.0555267333984375, 6.9667205810546875, -1.591156005859375, 4.032146453857422, 2.0720367431640625, 1.2537612915039062, -0.3873767852783203, 1.9940414428710938, 1.197967529296875, -2.751293182373047, -0.784759521484375, 2.9659194946289062, 4.130434036254883, 7.714080810546875, 6.758171081542969, -0.8282604217529297, 0.40362548828125, 0.5122833251953125, 0.5841751098632812, 3.2325191497802734, 3.5508804321289062, -1.7079505920410156, 8.375919342041016, -0.5504684448242188, 3.8217391967773438, 0.24539566040039062, 0.8681564331054688, 2.44842529296875, 2.380950927734375, -0.9874668121337891, -1.4155120849609375, 0.429656982421875, 1.1790828704833984, 0.7349052429199219, 4.7452239990234375, -5.694488525390625, -4.904605865478516, 2.1926422119140625, 7.993391036987305, -0.14187240600585938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000147.npy"}
|
||||
{"epoch": 0.2222222222222222, "step": 148, "batch_size": 64, "mean": 2.1262576580047607, "std": 3.2517881393432617, "min": -4.792106628417969, "p10": -1.6049835205078122, "median": 2.0420799255371094, "p90": 6.140214729309082, "max": 9.139167785644531, "pos_frac": 0.71875, "sample": [7.1100006103515625, -3.5539684295654297, 2.4137630462646484, -2.5695858001708984, 5.245414733886719, 9.139167785644531, 3.4152565002441406, -4.5660552978515625, 4.2275390625, 5.903656005859375, -0.9664592742919922, 5.717552185058594, 1.6425285339355469, 1.3381500244140625, 4.521404266357422, -1.3668899536132812, -1.144500732421875, 5.4460906982421875, 3.001373291015625, -0.7160720825195312, 3.0519371032714844, -1.7070236206054688, 6.37286376953125, 0.9173107147216797, 5.668073654174805, 8.804916381835938, 3.9869766235351562, -0.1216888427734375, 8.954376220703125, 1.9320411682128906, 2.015657424926758, -0.9905319213867188, 6.141763687133789, -2.23712158203125, 2.4637298583984375, 4.595848083496094, 0.6248245239257812, 4.65608024597168, -4.792106628417969, 0.09161758422851562, 1.94677734375, -0.15216445922851562, 6.136600494384766, 8.753555297851562, -3.341550827026367, 3.0634326934814453, 2.386880874633789, 2.068502426147461, 3.4970932006835938, 1.2140426635742188, 3.3110084533691406, 0.4386749267578125, 1.0680294036865234, 0.09665679931640625, -0.04265403747558594, 3.5429763793945312, 2.0934906005859375, 2.6259613037109375, 1.5943183898925781, -0.7476425170898438, 0.6036415100097656, 2.6695632934570312, -0.8819732666015625, -0.5326461791992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000148.npy"}
|
||||
{"epoch": 0.2237339380196523, "step": 149, "batch_size": 64, "mean": 2.4743001461029053, "std": 3.205138683319092, "min": -5.682365417480469, "p10": -1.4808235168457031, "median": 1.9354724884033203, "p90": 6.365123558044435, "max": 10.59068489074707, "pos_frac": 0.796875, "sample": [3.23406982421875, 5.543853759765625, 6.4947662353515625, 0.5316238403320312, 1.886993408203125, 0.08777618408203125, -0.2645263671875, 4.134336471557617, -0.4932880401611328, 5.239850997924805, -2.5451507568359375, 2.309194564819336, -0.0950775146484375, 2.1940441131591797, 4.0865478515625, 1.0785102844238281, 1.9078330993652344, 1.4495391845703125, 0.7021522521972656, 1.7429046630859375, 1.891427993774414, 8.445625305175781, 6.062623977661133, -1.5018348693847656, 10.59068489074707, 2.8484420776367188, 1.2037372589111328, -1.048421859741211, 2.0918025970458984, -5.682365417480469, -0.42093467712402344, 5.287651062011719, -1.4317970275878906, 9.600465774536133, 0.6874637603759766, 7.732637405395508, 5.678647994995117, -1.6518440246582031, 5.8931121826171875, -1.514688491821289, -2.4992218017578125, 3.4273223876953125, 0.36902618408203125, 4.5890045166015625, 8.447620391845703, 5.605417251586914, 2.5316543579101562, 4.301185607910156, 1.2716560363769531, 1.54962158203125, 2.657672882080078, 1.9631118774414062, 0.6497039794921875, -2.326263427734375, 0.35889244079589844, 4.308746337890625, 1.869873046875, 8.6695556640625, 4.921272277832031, 2.0194320678710938, 1.858428955078125, 4.546110153198242, 0.4954109191894531, 2.781585693359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000149.npy"}
|
||||
{"epoch": 0.2252456538170824, "step": 150, "batch_size": 64, "mean": 2.9409842491149902, "std": 3.1994614601135254, "min": -3.5655593872070312, "p10": -0.8165679931640621, "median": 2.6146812438964844, "p90": 6.975942993164063, "max": 11.145339965820312, "pos_frac": 0.84375, "sample": [2.7372207641601562, 0.6098060607910156, 8.3892822265625, 1.5550003051757812, -3.4357147216796875, -0.3784942626953125, 1.7732715606689453, 5.368377685546875, 3.6975555419921875, 5.35296630859375, 0.14760589599609375, 3.9212799072265625, 1.4612884521484375, 4.013214111328125, 0.01422882080078125, 2.4921417236328125, 2.279937744140625, 8.165008544921875, 2.318206787109375, 4.2032470703125, 3.5678558349609375, 0.980743408203125, 5.9274444580078125, -2.534820556640625, 0.524688720703125, 5.334583282470703, 1.1858673095703125, 4.9100189208984375, 0.8060226440429688, 8.053417205810547, 3.557811737060547, 3.6150360107421875, 6.348163604736328, 1.8402748107910156, 9.722328186035156, -3.5655593872070312, 1.419281005859375, 3.173412322998047, 3.8638687133789062, 7.996185302734375, 2.353485107421875, 6.864463806152344, -0.5131378173828125, 2.3448028564453125, 7.023719787597656, 1.68621826171875, 5.28729248046875, 5.802528381347656, -1.86712646484375, 11.145339965820312, 0.33661651611328125, 0.0316162109375, 4.594329833984375, -0.9466094970703125, 1.3209476470947266, 5.366355895996094, -2.6281585693359375, -1.9341278076171875, 5.867773056030273, -0.49602508544921875, 1.9553146362304688, 4.519462585449219, 2.853912353515625, 5.8419342041015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000150.npy"}
|
||||
{"epoch": 0.22675736961451248, "step": 151, "batch_size": 64, "mean": 3.1568784713745117, "std": 3.4620301723480225, "min": -4.422512054443359, "p10": -0.9526142120361327, "median": 3.0531740188598633, "p90": 6.938533020019532, "max": 13.42706298828125, "pos_frac": 0.796875, "sample": [4.5008697509765625, 3.245594024658203, 6.628139495849609, -0.8458747863769531, 0.6407623291015625, 0.9070835113525391, 13.42706298828125, 0.5739555358886719, 8.488372802734375, 1.1751708984375, 2.5282363891601562, 5.167760848999023, 2.50775146484375, -0.8012008666992188, 3.020509719848633, 4.10991096496582, 6.0377044677734375, -0.3432598114013672, 2.8421077728271484, 0.21738052368164062, -0.9983596801757812, -1.1650409698486328, 4.766426086425781, -1.6797447204589844, 11.624710083007812, -2.0625534057617188, 4.118215560913086, 2.9432296752929688, -1.0194282531738281, -0.4557952880859375, 3.3418540954589844, -1.3815689086914062, 5.5760040283203125, 4.3913726806640625, 1.633026123046875, -0.7199859619140625, 3.5405502319335938, 3.1634445190429688, 1.1925201416015625, 10.78839111328125, 0.7140140533447266, 3.0858383178710938, 6.2966156005859375, 1.7648124694824219, 8.563011169433594, 6.210472106933594, 6.688262939453125, 6.0566253662109375, 7.0457916259765625, 2.672710418701172, 3.27972412109375, 4.169136047363281, 1.7983760833740234, 8.874885559082031, -4.422512054443359, 5.263462066650391, 4.470027923583984, 2.3531055450439453, -0.5681095123291016, 0.82379150390625, 3.8027420043945312, 6.092292785644531, 0.5179691314697266, 4.86187744140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000151.npy"}
|
||||
{"epoch": 0.22826908541194255, "step": 152, "batch_size": 64, "mean": 2.44417667388916, "std": 3.9550161361694336, "min": -6.569496154785156, "p10": -1.9076499938964842, "median": 1.786860466003418, "p90": 8.212180137634277, "max": 11.52386474609375, "pos_frac": 0.71875, "sample": [-0.8525047302246094, 1.2310104370117188, 5.237630844116211, 1.2624053955078125, -6.569496154785156, 0.8168411254882812, 3.74169921875, 4.3343963623046875, 1.7409820556640625, -0.6208343505859375, 1.825235366821289, -2.0660018920898438, 3.014636993408203, 0.23474884033203125, 6.831451416015625, 8.49859619140625, 0.8870372772216797, 1.9608688354492188, 0.7384624481201172, -0.7004470825195312, -5.991386413574219, 8.004497528076172, 4.0890960693359375, 3.5004043579101562, -0.6601829528808594, 0.6863613128662109, 4.700649261474609, 2.1263809204101562, 0.8251628875732422, 2.0798797607421875, -0.02153778076171875, 10.29619026184082, 0.7677326202392578, 1.2212448120117188, -0.2947540283203125, -0.6817626953125, 1.7484855651855469, -0.7216949462890625, 4.687507629394531, 2.977783203125, -2.347808837890625, 8.212503433227539, 4.941719055175781, -1.7359237670898438, 6.304874420166016, 8.380279541015625, 5.2343902587890625, -0.6715164184570312, 3.6165618896484375, 2.986480712890625, -6.075225830078125, 8.21142578125, 0.03246307373046875, 9.791252136230469, 5.812244415283203, 3.0254592895507812, 9.219337463378906, 7.868598937988281, -1.9812469482421875, 11.52386474609375, -0.5710792541503906, -2.8492469787597656, 1.1843643188476562, 5.4267730712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000152.npy"}
|
||||
{"epoch": 0.22978080120937264, "step": 153, "batch_size": 64, "mean": 2.6156349182128906, "std": 3.232847213745117, "min": -4.034576416015625, "p10": -1.608658218383789, "median": 2.1321449279785156, "p90": 6.952326965332031, "max": 10.109600067138672, "pos_frac": 0.8125, "sample": [3.645395278930664, 1.6458816528320312, 2.1402511596679688, 6.910980224609375, 4.9608917236328125, 7.259014129638672, 4.040596008300781, 2.1240386962890625, 1.7342300415039062, 1.3228302001953125, 1.8506107330322266, 8.122922897338867, 1.43328857421875, 6.309120178222656, 2.213165283203125, -1.6941604614257812, 4.320953369140625, 0.5316390991210938, 0.2728424072265625, 8.136322021484375, 10.109600067138672, 6.291027069091797, -2.0498619079589844, 2.4084243774414062, 9.433910369873047, -2.2740917205810547, 8.119499206542969, 0.9880924224853516, 0.2885589599609375, 6.230037689208984, -3.2527999877929688, 1.8159828186035156, -4.034576416015625, 0.699188232421875, 3.407623291015625, 0.7984199523925781, 0.35363006591796875, 6.646781921386719, 2.74822998046875, 2.186237335205078, 1.2283782958984375, -0.5888671875, 6.9700469970703125, 3.354522705078125, 3.1070823669433594, -0.18238258361816406, 4.517433166503906, 2.0082168579101562, 3.705280303955078, -0.8614406585693359, 0.3040771484375, 4.06793212890625, 1.0259513854980469, 0.9792327880859375, 4.7867889404296875, -2.2769927978515625, -2.09698486328125, 6.573526382446289, -0.6584320068359375, 6.040557861328125, 4.909976959228516, 0.2006206512451172, -1.4091529846191406, 3.5005340576171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000153.npy"}
|
||||
{"epoch": 0.23129251700680273, "step": 154, "batch_size": 64, "mean": 1.9371455907821655, "std": 3.9494028091430664, "min": -7.96885871887207, "p10": -3.434952545166015, "median": 2.2813472747802734, "p90": 6.875114631652834, "max": 11.072135925292969, "pos_frac": 0.6875, "sample": [-4.328327178955078, 2.346343994140625, -7.117218017578125, -7.96885871887207, 7.2869415283203125, 4.539583206176758, -4.395475387573242, 1.9107818603515625, 1.4531707763671875, -3.6161422729492188, 2.5674667358398438, 0.00302886962890625, 4.179224014282227, -0.279388427734375, -2.4494476318359375, -3.012176513671875, -1.0982170104980469, 2.216350555419922, 0.22041893005371094, 0.7968215942382812, 5.523750305175781, 9.993051528930664, 8.760971069335938, 2.7678375244140625, -4.5484161376953125, 2.978006362915039, 7.775947570800781, -0.216644287109375, 8.81353759765625, -6.7090301513671875, 3.761934280395508, 2.1420249938964844, 5.880943298339844, 4.683963775634766, 2.7069969177246094, -0.3222694396972656, 1.5870132446289062, 3.942831039428711, 4.409820556640625, 2.7660694122314453, -0.9634513854980469, -0.47900390625, 3.647249221801758, -0.8703460693359375, 1.2433395385742188, 11.072135925292969, 3.2764549255371094, 3.5737533569335938, 3.0055389404296875, -2.1158103942871094, 1.4725494384765625, -0.11641693115234375, 1.8826713562011719, -0.4940185546875, 5.257415771484375, 2.8527069091796875, 7.066867828369141, -0.0117645263671875, 6.427690505981445, 5.296546936035156, 5.174644470214844, 2.791534423828125, 1.4997272491455078, 3.5340805053710938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000154.npy"}
|
||||
{"epoch": 0.2328042328042328, "step": 155, "batch_size": 64, "mean": 2.608389377593994, "std": 4.530350685119629, "min": -7.291984558105469, "p10": -3.2774868011474605, "median": 2.042236328125, "p90": 9.16835403442383, "max": 11.778215408325195, "pos_frac": 0.734375, "sample": [1.6548080444335938, 0.3865203857421875, 8.922821044921875, 3.1923065185546875, 0.8151092529296875, 1.8500480651855469, 0.268707275390625, 5.711626052856445, -1.5614585876464844, 8.75741958618164, 6.33514404296875, 6.62797737121582, -0.9838027954101562, -4.061622619628906, -0.19020462036132812, -2.2784576416015625, 3.3946533203125, 11.021255493164062, 3.614471435546875, 9.697860717773438, 9.359596252441406, -0.397003173828125, 5.282676696777344, -4.39253044128418, 5.082000732421875, 11.466964721679688, 0.1199493408203125, 6.2957611083984375, -1.4901351928710938, 6.785346984863281, -7.066558837890625, 1.8871574401855469, 11.778215408325195, 6.067684173583984, 9.382400512695312, 4.3048095703125, -0.24202728271484375, -1.8293533325195312, -5.2116851806640625, 4.134803771972656, -4.724456787109375, 1.5509223937988281, 4.8990478515625, 1.1375083923339844, 7.374855041503906, 3.113903045654297, 2.01983642578125, 7.418830871582031, -2.7281417846679688, 2.542797088623047, 4.055442810058594, 2.947662353515625, 0.0861358642578125, 0.7142791748046875, 1.7027873992919922, 0.5582504272460938, 4.036266326904297, -3.512920379638672, -7.291984558105469, -1.2101058959960938, 2.06463623046875, 9.273582458496094, 6.365682601928711, 0.0488433837890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000155.npy"}
|
||||
{"epoch": 0.23431594860166288, "step": 156, "batch_size": 64, "mean": 3.0588912963867188, "std": 3.8323512077331543, "min": -5.147216796875, "p10": -1.2733671188354492, "median": 2.476541519165039, "p90": 8.556719970703126, "max": 13.896766662597656, "pos_frac": 0.78125, "sample": [2.0600128173828125, -1.1350860595703125, -1.265939712524414, -3.567646026611328, 6.322296142578125, 9.028244018554688, 13.896766662597656, 2.7458877563476562, 4.2250213623046875, 8.748176574707031, 1.730194091796875, 6.200595855712891, -2.46624755859375, 4.2117767333984375, 6.273651123046875, 6.037162780761719, 7.089874267578125, 4.93438720703125, -0.9128379821777344, 4.699562072753906, 1.2712879180908203, 1.1136970520019531, 3.5221824645996094, 4.5113372802734375, 0.89764404296875, 1.8944854736328125, -0.7086334228515625, 5.530876159667969, 4.362518310546875, 6.212196350097656, 1.7260589599609375, 2.2912445068359375, -1.3890151977539062, 4.47406005859375, 3.5075912475585938, 13.545936584472656, -5.147216796875, 3.4844970703125, -2.4952392578125, 10.038314819335938, 5.263832092285156, 9.504302978515625, 1.1225662231445312, 1.5653076171875, 0.06792068481445312, 0.6554641723632812, -0.14589691162109375, -1.8261184692382812, 2.7022171020507812, 1.2037734985351562, 1.7905025482177734, -0.332427978515625, 4.367162704467773, 1.2352371215820312, 4.651824951171875, 1.3773422241210938, -1.1569290161132812, 4.702430725097656, 8.328399658203125, 1.1613426208496094, 2.6618385314941406, 8.654571533203125, 1.9912528991699219, -1.27655029296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000156.npy"}
|
||||
{"epoch": 0.23582766439909297, "step": 157, "batch_size": 64, "mean": 2.6608567237854004, "std": 3.9161317348480225, "min": -6.286895751953125, "p10": -1.6793937683105467, "median": 2.373394012451172, "p90": 7.814600372314454, "max": 14.8505859375, "pos_frac": 0.75, "sample": [7.927173614501953, 9.89564323425293, -0.9224777221679688, 4.381996154785156, 4.853179931640625, -5.104087829589844, 2.8175201416015625, -0.52032470703125, 7.551929473876953, -0.5451278686523438, -0.27739715576171875, 2.8766250610351562, 8.999069213867188, 1.7421607971191406, -6.286895751953125, 0.11556625366210938, 0.8036956787109375, 1.0857772827148438, 5.878358840942383, 8.426351547241211, 4.2390899658203125, 5.887840270996094, 14.8505859375, 4.150087356567383, 7.493024826049805, 2.4935760498046875, -1.8122024536132812, -0.5528469085693359, 2.8752174377441406, -0.92919921875, 5.675180435180664, 0.5482330322265625, 3.562175750732422, 2.2660293579101562, 3.1677322387695312, 8.426101684570312, -2.3285446166992188, 8.884489059448242, 7.363121032714844, 1.4462966918945312, -1.3695068359375, 2.3845596313476562, -0.2848625183105469, 6.481414794921875, 6.0646514892578125, 1.2081890106201172, 0.015380859375, 3.7941970825195312, 2.3622283935546875, 0.5180721282958984, 2.111724853515625, -2.4053611755371094, 1.7139167785644531, 1.6483383178710938, 3.247049331665039, 5.460973739624023, 1.13555908203125, 3.520519256591797, -4.753852844238281, 4.73760986328125, 2.1171875, -1.2073822021484375, -3.85064697265625, 4.2401580810546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000157.npy"}
|
||||
{"epoch": 0.23733938019652306, "step": 158, "batch_size": 64, "mean": 2.5913643836975098, "std": 3.814117193222046, "min": -5.032371520996094, "p10": -1.9729476928710936, "median": 2.3485374450683594, "p90": 7.940828514099121, "max": 10.836090087890625, "pos_frac": 0.703125, "sample": [1.8539352416992188, 5.175880432128906, 2.59344482421875, 2.674325942993164, -5.032371520996094, 5.695035934448242, 10.59661865234375, 5.6632843017578125, -0.8423309326171875, -0.0748291015625, 8.1826171875, 3.3466720581054688, 3.6337890625, 4.809732437133789, -5.02055549621582, 3.564403533935547, 9.93756103515625, 1.9727535247802734, 1.833282470703125, 4.298896789550781, -0.7470779418945312, 0.73040771484375, 5.297397613525391, 1.0514297485351562, 8.169763565063477, 3.436737060546875, -0.8939361572265625, 9.022575378417969, 4.0399322509765625, 4.2565460205078125, -1.039703369140625, 3.930816650390625, 6.878917694091797, 1.6881179809570312, 7.964776992797852, 6.64491081237793, 5.487297058105469, -1.8242340087890625, 2.0332908630371094, -3.5579833984375, 7.5663909912109375, 2.1036300659179688, 3.0626144409179688, 7.88494873046875, 3.7537841796875, 4.55925178527832, -0.42069435119628906, -1.5289382934570312, 10.836090087890625, -1.1783943176269531, 1.8382339477539062, -1.3001022338867188, -2.03668212890625, 1.406341552734375, 0.7385730743408203, -0.5239772796630859, -2.3636722564697266, 5.142345428466797, 4.367424011230469, 1.8577556610107422, -3.380809783935547, 0.6868820190429688, -0.29683494567871094, -4.358978271484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000158.npy"}
|
||||
{"epoch": 0.23885109599395313, "step": 159, "batch_size": 64, "mean": 3.0190436840057373, "std": 4.640224933624268, "min": -13.97845458984375, "p10": -2.156194496154785, "median": 2.808847427368164, "p90": 8.794644927978515, "max": 13.516571044921875, "pos_frac": 0.796875, "sample": [6.229999542236328, 5.287899017333984, 1.9859848022460938, 7.7526092529296875, 3.687835693359375, -3.2936172485351562, -5.1851806640625, 10.129674911499023, 0.2166595458984375, 13.421340942382812, 8.795013427734375, 2.677001953125, 9.379999160766602, 2.328369140625, -0.8342704772949219, 2.2410011291503906, 4.20513916015625, 13.516571044921875, 4.436317443847656, -0.019235610961914062, 1.0942573547363281, -5.324058532714844, 2.1370506286621094, 3.8953323364257812, 2.6259078979492188, 1.6166706085205078, 1.7064132690429688, -3.0201644897460938, 6.495849609375, 11.595855712890625, -1.8061904907226562, 5.0570526123046875, 3.2252368927001953, -0.22151947021484375, 1.0740737915039062, 3.6699752807617188, 8.793785095214844, 4.44512939453125, 1.07562255859375, -0.5282669067382812, 2.940692901611328, -3.0284576416015625, 10.0269775390625, 0.9164676666259766, 3.3781890869140625, 8.652589797973633, -2.3061962127685547, 4.216958999633789, -1.1398773193359375, 2.5249862670898438, 0.05388641357421875, 7.679643630981445, 3.7209548950195312, 0.0720062255859375, 0.14169883728027344, -13.97845458984375, 6.657442092895508, 3.795482635498047, 0.3572235107421875, 2.467813491821289, 5.1132354736328125, 3.3066253662109375, 7.132171630859375, 5.949615478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000159.npy"}
|
||||
{"epoch": 0.24036281179138322, "step": 160, "batch_size": 64, "mean": 2.879239082336426, "std": 4.894455909729004, "min": -7.256095886230469, "p10": -2.73146800994873, "median": 2.6856813430786133, "p90": 9.337822151184083, "max": 16.552322387695312, "pos_frac": 0.71875, "sample": [1.130157470703125, -1.3515892028808594, 3.9044189453125, 12.409706115722656, 1.5028057098388672, 8.022300720214844, 0.7112579345703125, -0.4771728515625, 3.2481327056884766, 6.5595550537109375, 14.488204956054688, -1.399505615234375, 9.410871505737305, 6.8669281005859375, 4.64311408996582, 9.167373657226562, 0.3936347961425781, -4.840950012207031, 4.741672515869141, 6.480676651000977, -1.2659225463867188, -3.6351356506347656, 1.4235343933105469, -1.55059814453125, 3.664915084838867, 3.4006690979003906, 5.409637451171875, 0.8517284393310547, 3.5351104736328125, 0.6512794494628906, 2.6231937408447266, 3.333913803100586, 16.552322387695312, 12.678272247314453, 0.9221782684326172, -2.0194129943847656, 6.2495574951171875, 1.1105995178222656, -0.15995216369628906, 8.016082763671875, 0.8232002258300781, 5.2569427490234375, 0.7437095642089844, -3.406574249267578, -1.2647991180419922, 4.0874786376953125, -6.193695068359375, -3.0366344451904297, 5.094306945800781, 4.083282470703125, -4.3925323486328125, -1.2843856811523438, -1.7088546752929688, 5.989358901977539, 5.616050720214844, 0.8858489990234375, 0.5291671752929688, 9.664398193359375, -7.256095886230469, 4.3720550537109375, 6.7662811279296875, 10.766979217529297, 2.7481689453125, -2.01593017578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000160.npy"}
|
||||
{"epoch": 0.2418745275888133, "step": 161, "batch_size": 64, "mean": 2.9373526573181152, "std": 4.611465930938721, "min": -10.555900573730469, "p10": -1.7564201354980469, "median": 2.4375104904174805, "p90": 9.257945251464847, "max": 14.494354248046875, "pos_frac": 0.75, "sample": [2.5868453979492188, -6.612621307373047, 0.956573486328125, 3.37884521484375, 1.718994140625, -0.6856231689453125, 6.035919189453125, 11.54754638671875, -2.0544471740722656, -1.6035890579223633, -3.4739761352539062, 4.292266845703125, 3.558504104614258, 5.245857238769531, 9.525890350341797, 2.381227493286133, 3.9639129638671875, -10.555900573730469, 4.9259185791015625, -1.7670745849609375, -1.59710693359375, 2.032012939453125, 1.5568695068359375, 0.164398193359375, 4.668357849121094, -2.3554954528808594, 0.7965240478515625, 4.549089431762695, 3.7943687438964844, -0.8184051513671875, 8.569936752319336, 8.632740020751953, 5.025285720825195, 10.495002746582031, 9.972175598144531, -1.7315597534179688, -2.207914352416992, 14.477859497070312, 8.012069702148438, 7.030025482177734, 2.493793487548828, 8.459999084472656, -0.002777099609375, 2.3407135009765625, 2.5479507446289062, 0.6217842102050781, 1.1787109375, 4.317653656005859, 2.1353302001953125, 5.4579010009765625, -1.1031341552734375, 2.9857864379882812, 10.223686218261719, 1.41119384765625, 0.765411376953125, 0.14963340759277344, -0.651641845703125, 1.7624435424804688, 6.544746398925781, 3.7769622802734375, 14.494354248046875, -1.6134796142578125, 0.07073020935058594, 5.191493988037109], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000161.npy"}
|
||||
{"epoch": 0.24338624338624337, "step": 162, "batch_size": 64, "mean": 4.251701354980469, "std": 4.527994155883789, "min": -4.832160949707031, "p10": -2.384879302978516, "median": 4.491995811462402, "p90": 9.76094856262207, "max": 15.814346313476562, "pos_frac": 0.828125, "sample": [7.510528564453125, 7.238792419433594, -3.6667327880859375, 1.6388473510742188, 1.7515335083007812, 6.896297454833984, 4.930364608764648, 2.5472564697265625, 8.948074340820312, 10.664567947387695, 3.44873046875, 6.817893981933594, 5.091024398803711, 15.814346313476562, 7.438926696777344, 4.308454513549805, 3.83392333984375, 12.174606323242188, 4.675537109375, 11.634147644042969, 6.992912292480469, 6.262012481689453, 2.172161102294922, -2.3911895751953125, 4.900203704833984, -2.2840423583984375, 5.640655517578125, 2.2728233337402344, -3.5241775512695312, 8.6614990234375, 2.0450592041015625, 3.8100738525390625, 0.7259368896484375, 9.583152770996094, 8.892669677734375, 3.4180068969726562, 8.574575424194336, 6.447999954223633, 1.0399856567382812, -2.879606246948242, -3.2835159301757812, -1.34075927734375, 10.977134704589844, 9.837146759033203, 5.790506362915039, 3.759368896484375, -3.91168212890625, 5.117950439453125, -4.832160949707031, -0.21416473388671875, 2.044208526611328, 0.5324554443359375, 5.080596923828125, 6.38615608215332, 11.265251159667969, 6.278953552246094, 1.005746841430664, -2.3701553344726562, 1.5743408203125, 2.070068359375, 2.1550159454345703, 8.853790283203125, 8.971776962280273, 2.3030242919921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000162.npy"}
|
||||
{"epoch": 0.24489795918367346, "step": 163, "batch_size": 64, "mean": 4.13087272644043, "std": 5.2572503089904785, "min": -7.11357307434082, "p10": -3.3953857421874996, "median": 3.521757125854492, "p90": 10.48502311706543, "max": 14.364959716796875, "pos_frac": 0.796875, "sample": [-0.9119052886962891, 6.184379577636719, -6.056449890136719, 9.89544677734375, 4.680809020996094, 1.3797340393066406, -7.11357307434082, 2.953887939453125, 12.105682373046875, -3.4935760498046875, 5.476280212402344, 7.305091857910156, 13.933143615722656, 2.240997314453125, 3.1091156005859375, 5.664543151855469, 6.022163391113281, -0.18920135498046875, 14.364959716796875, 4.603401184082031, 0.43199920654296875, 6.585929870605469, 8.243541717529297, -2.9314193725585938, -3.508859634399414, 10.94668960571289, 3.644794464111328, 8.24481201171875, 4.56407356262207, 2.1249618530273438, 6.267129898071289, 2.6089935302734375, 7.1765289306640625, 0.3203239440917969, 7.7099456787109375, 9.8046875, -3.9638290405273438, 10.432907104492188, -3.5216217041015625, 1.7384624481201172, 7.167423248291016, 13.834659576416016, 3.272064208984375, 8.767169952392578, 1.16375732421875, -1.570220947265625, -0.44484710693359375, 2.0628738403320312, 1.8814697265625, 0.7603302001953125, -6.4741363525390625, 10.394290924072266, 8.405632019042969, 0.5729618072509766, 1.9768905639648438, -3.1662750244140625, 10.08526611328125, 13.611663818359375, 10.50735855102539, 8.69583511352539, 5.586963653564453, 2.0602664947509766, 2.750743865966797, 3.3987197875976562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000163.npy"}
|
||||
{"epoch": 0.24640967498110355, "step": 164, "batch_size": 64, "mean": 4.023058891296387, "std": 5.4777445793151855, "min": -10.621387481689453, "p10": -2.203849411010742, "median": 3.2090330123901367, "p90": 10.697266578674316, "max": 17.929595947265625, "pos_frac": 0.796875, "sample": [-2.3246383666992188, 0.8869819641113281, 0.15210723876953125, 3.082721710205078, 2.866668701171875, 2.6636276245117188, 16.0701904296875, 10.765968322753906, -0.35137176513671875, 1.88604736328125, 6.480316162109375, -1.9220085144042969, 8.243457794189453, -4.8516387939453125, 5.678249359130859, 7.75897216796875, 9.576560974121094, 3.3353443145751953, 13.09259033203125, -1.5783061981201172, 2.7683773040771484, -5.291358947753906, 4.1324005126953125, 3.9918060302734375, 0.2871417999267578, 3.0487442016601562, 10.380062103271484, 1.46710205078125, 3.8848724365234375, 0.3233833312988281, 8.336015701293945, 11.53200912475586, -1.3243579864501953, 4.803256988525391, 4.868961334228516, -4.6262664794921875, 10.31890869140625, 17.929595947265625, 13.516031265258789, -5.170204162597656, 2.7063465118408203, 3.5453109741210938, 1.5162925720214844, 8.13041877746582, 3.03961181640625, 4.0627899169921875, 9.104766845703125, 0.8305225372314453, 8.976593017578125, 1.1427574157714844, 1.6478118896484375, 3.0593948364257812, 11.034008026123047, -3.8114070892333984, -0.08229255676269531, -10.621387481689453, 2.5710220336914062, 10.536962509155273, 10.192508697509766, -0.45334625244140625, 10.0936279296875, 3.3643417358398438, 4.651037216186523, 5.549751281738281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000164.npy"}
|
||||
{"epoch": 0.24792139077853365, "step": 165, "batch_size": 64, "mean": 3.687171459197998, "std": 5.078067302703857, "min": -8.247184753417969, "p10": -2.5972993850708, "median": 3.996257781982422, "p90": 10.707990264892581, "max": 13.369377136230469, "pos_frac": 0.78125, "sample": [13.369377136230469, 4.80107307434082, 5.081214904785156, 8.545047760009766, 2.1780548095703125, 0.03296661376953125, 2.534454345703125, -1.001739501953125, 5.157024383544922, 2.796762466430664, 1.4757080078125, 5.850383758544922, 9.926658630371094, 0.8322086334228516, -0.63177490234375, 2.739288330078125, 4.1822509765625, 4.26934814453125, -6.2178192138671875, -1.4087142944335938, -6.6459197998046875, -0.5597000122070312, 11.07037353515625, -1.443817138671875, 11.0428466796875, 11.367748260498047, 3.180471420288086, 4.603330612182617, 12.424398422241211, 11.750408172607422, 3.977020263671875, 0.6104068756103516, 12.545867919921875, 9.147762298583984, 6.533489227294922, -8.247184753417969, 1.2088088989257812, 7.410579681396484, 6.754245758056641, 7.49700927734375, -2.182981491088867, 0.14963912963867188, 1.5013885498046875, 1.5792560577392578, 9.81393051147461, -2.7748641967773438, 2.1819496154785156, 1.8319320678710938, 5.849720001220703, -0.49645233154296875, -3.3710174560546875, 5.482307434082031, -7.347190856933594, 8.04007339477539, 3.467021942138672, -4.789875030517578, 4.933006286621094, 1.8217830657958984, 5.7954559326171875, 4.015495300292969, 8.149940490722656, 8.782852172851562, 6.550346374511719, 8.255348205566406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000165.npy"}
|
||||
{"epoch": 0.2494331065759637, "step": 166, "batch_size": 64, "mean": 2.8450818061828613, "std": 4.825867652893066, "min": -4.825420379638672, "p10": -2.7153861999511717, "median": 1.674464225769043, "p90": 9.225240707397463, "max": 16.038692474365234, "pos_frac": 0.671875, "sample": [6.7945556640625, -1.1553936004638672, -2.7531585693359375, 8.643051147460938, 8.240943908691406, 1.2078475952148438, -1.98980712890625, 10.52264404296875, 1.20147705078125, 2.946624755859375, -2.6272506713867188, 3.6799583435058594, 10.40523910522461, -2.311431884765625, 6.4294586181640625, 1.38922119140625, -0.74530029296875, 7.836017608642578, 0.3726806640625, 0.4271507263183594, -1.5473690032958984, 8.719554901123047, -4.120460510253906, 3.4891738891601562, 9.441963195800781, 3.20562744140625, -0.6038360595703125, 6.705535888671875, 6.197349548339844, 3.7007980346679688, -2.9217071533203125, -4.825420379638672, 1.828634262084961, 11.565605163574219, 6.516807556152344, -0.6317481994628906, 8.141265869140625, 1.0153427124023438, 4.964691162109375, 4.44288444519043, 1.25567626953125, 13.687637329101562, -1.8293609619140625, -4.702629089355469, 4.54925537109375, -4.172515869140625, 0.212310791015625, -1.2302513122558594, 2.827871322631836, 16.038692474365234, 3.836872100830078, -3.051410675048828, 1.520294189453125, 6.3639678955078125, -2.518199920654297, -0.9501800537109375, 5.687065124511719, -1.9677982330322266, 11.514190673828125, -0.9249343872070312, 4.3450775146484375, 5.739173889160156, 1.2356815338134766, 0.8195266723632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000166.npy"}
|
||||
{"epoch": 0.2509448223733938, "step": 167, "batch_size": 64, "mean": 4.420334339141846, "std": 5.028050899505615, "min": -7.143535614013672, "p10": -1.8369014739990233, "median": 3.46121883392334, "p90": 10.798622131347658, "max": 15.264617919921875, "pos_frac": 0.8125, "sample": [1.9413986206054688, 3.2741127014160156, 2.2273006439208984, 5.244632720947266, -4.7122650146484375, 7.9577484130859375, 9.79364013671875, 2.0448074340820312, 3.5129146575927734, 6.5625, 8.394067764282227, 3.4095230102539062, 2.165264129638672, 0.10773468017578125, -0.07869720458984375, 0.5911178588867188, 10.043930053710938, 8.406631469726562, 10.575332641601562, -1.5608978271484375, -7.143535614013672, 3.2334938049316406, 2.8222179412841797, 1.1219406127929688, 3.363140106201172, 0.6698760986328125, 3.7615432739257812, -0.5726528167724609, 3.1461639404296875, 8.355316162109375, 2.33734130859375, 7.771688461303711, 0.3933582305908203, 6.618144989013672, 6.19793701171875, 6.17340087890625, 13.219459533691406, 15.264617919921875, 9.266891479492188, 8.205070495605469, 0.9963035583496094, 7.241119384765625, -2.0925827026367188, 3.900379180908203, 9.025480270385742, -2.2503204345703125, -4.816963195800781, 12.909111022949219, -0.8142013549804688, 8.085655212402344, 8.150798797607422, -0.061992645263671875, 0.77886962890625, 14.658252716064453, 7.665557861328125, 11.488449096679688, -2.5260696411132812, 13.822769165039062, 7.035831451416016, 10.894317626953125, 4.9600830078125, 0.7766075134277344, 0.9229202270507812, -1.9551887512207031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000167.npy"}
|
||||
{"epoch": 0.25245653817082386, "step": 168, "batch_size": 64, "mean": 3.0770163536071777, "std": 4.787522315979004, "min": -5.145957946777344, "p10": -2.258342742919922, "median": 2.4858341217041016, "p90": 9.832202911376953, "max": 12.746025085449219, "pos_frac": 0.671875, "sample": [5.082572937011719, -3.4603042602539062, 9.646965026855469, -2.3942489624023438, 2.132190704345703, 9.106525421142578, 5.8609161376953125, -1.9962387084960938, -4.739776611328125, -3.94110107421875, 3.502838134765625, -1.4405460357666016, 3.0073394775390625, 11.911727905273438, 4.91278076171875, 1.658681869506836, 5.877613067626953, 2.8394775390625, 0.344207763671875, 4.783454895019531, 6.3493804931640625, -1.2190608978271484, 1.1910896301269531, 3.2946300506591797, -0.5582294464111328, 5.768451690673828, 12.155433654785156, 1.4183006286621094, -5.145957946777344, 3.5905990600585938, -0.27837371826171875, 2.9064254760742188, -1.5291748046875, 7.576591491699219, 9.05807876586914, -0.681732177734375, 9.911590576171875, 1.4295578002929688, -1.4461746215820312, 1.484375, 0.9561977386474609, 12.746025085449219, -2.2779464721679688, 11.02865219116211, 8.873878479003906, -0.8567123413085938, 7.028656005859375, -1.268646240234375, 2.9815292358398438, -2.0819625854492188, 5.023101806640625, 0.9225883483886719, 8.40414810180664, 1.7864837646484375, -1.3500595092773438, 11.321783065795898, -2.2126007080078125, -4.309841156005859, -0.6466445922851562, 0.40717506408691406, 8.226707458496094, 11.334709167480469, 9.199920654296875, 3.7210159301757812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000168.npy"}
|
||||
{"epoch": 0.25396825396825395, "step": 169, "batch_size": 64, "mean": 4.214674472808838, "std": 6.365307807922363, "min": -8.592765808105469, "p10": -2.440925598144531, "median": 3.3080673217773438, "p90": 12.495620727539062, "max": 25.425140380859375, "pos_frac": 0.78125, "sample": [4.2015228271484375, 9.48321533203125, 8.457122802734375, -3.4855880737304688, 4.98577880859375, 1.824483871459961, -2.598888397216797, 0.36295318603515625, 6.672981262207031, 5.801483154296875, 1.7363662719726562, 0.6809921264648438, 8.05462646484375, 12.176559448242188, 0.5449295043945312, -1.9368858337402344, 3.7618370056152344, 2.81353759765625, 6.726600646972656, -0.6695327758789062, -1.2564239501953125, 12.292999267578125, 13.8505859375, 4.870330810546875, -1.4714546203613281, 22.76213836669922, 2.2983627319335938, 3.8433837890625, -0.0108489990234375, 6.223461151123047, 2.7776317596435547, 1.845916748046875, 8.495216369628906, -2.072345733642578, 7.646739959716797, 2.141448974609375, 8.421810150146484, -8.592765808105469, 4.156534194946289, 2.8910064697265625, 0.9763164520263672, 3.725128173828125, 1.804403305053711, 0.4189300537109375, 5.214801788330078, 2.1156692504882812, 5.667411804199219, 0.8627471923828125, 13.754241943359375, 6.9247589111328125, -1.0704727172851562, 6.354892730712891, 5.18548583984375, -7.3863525390625, -3.3282470703125, 25.425140380859375, 12.58245849609375, 14.046890258789062, -6.420999526977539, 16.480342864990234, 2.1846771240234375, 8.26719856262207, -5.146728515625, 0.39263916015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000169.npy"}
|
||||
{"epoch": 0.25547996976568405, "step": 170, "batch_size": 64, "mean": 3.88338041305542, "std": 6.32066535949707, "min": -14.838342666625977, "p10": -5.003650665283202, "median": 4.313511848449707, "p90": 11.5781852722168, "max": 16.947280883789062, "pos_frac": 0.75, "sample": [8.089950561523438, 0.7280044555664062, 3.945253372192383, 4.2513580322265625, 3.251453399658203, 16.58515167236328, 3.721040725708008, 4.676904678344727, 3.850128173828125, 14.284154891967773, -5.343963623046875, 1.545989990234375, 6.748298645019531, 16.947280883789062, 12.445537567138672, -1.5414257049560547, 2.6495494842529297, 6.26318359375, 7.696708679199219, -0.882781982421875, -0.4280662536621094, 10.608963012695312, 4.375665664672852, 0.2908191680908203, -4.209587097167969, 5.291656494140625, -6.701210021972656, -2.1191253662109375, -14.838342666625977, 6.5262451171875, 8.38955307006836, 5.363739013671875, 10.140083312988281, 4.585958480834961, -8.307310104370117, 3.445831298828125, -6.0562591552734375, 7.202262878417969, 11.868698120117188, -0.45925140380859375, 5.66009521484375, -0.43896484375, 0.18314361572265625, 6.55194091796875, 5.2441558837890625, 16.69671630859375, 5.9742279052734375, 10.900321960449219, 3.928607940673828, 0.20947265625, -1.8795928955078125, -2.563974380493164, 1.503875732421875, 12.716331481933594, -5.767204284667969, 1.4749870300292969, 8.146621704101562, 9.922401428222656, 1.2288856506347656, 5.315219879150391, 9.06475830078125, 9.824234008789062, 6.854255676269531, -7.096260070800781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000170.npy"}
|
||||
{"epoch": 0.25699168556311414, "step": 171, "batch_size": 64, "mean": 4.575246810913086, "std": 6.941427707672119, "min": -15.422599792480469, "p10": -1.5753601074218748, "median": 3.337301254272461, "p90": 14.861283111572266, "max": 16.927993774414062, "pos_frac": 0.734375, "sample": [4.3619232177734375, 5.172657012939453, 13.999526977539062, 8.077728271484375, 7.577125549316406, 16.386905670166016, 6.854503631591797, 11.010780334472656, 0.31478309631347656, 2.70989990234375, -0.12241744995117188, 1.6993865966796875, 15.508247375488281, 6.984306335449219, 16.67215347290039, 0.061107635498046875, 9.515052795410156, 2.4544219970703125, 10.608903884887695, -1.0299224853515625, -0.20517730712890625, 2.564929962158203, 4.589366912841797, -3.2906494140625, 11.539783477783203, 0.08809089660644531, 2.8466949462890625, 0.6524276733398438, 14.697898864746094, -11.60983657836914, 5.7331390380859375, 7.0492706298828125, 15.813652038574219, -1.6233062744140625, 5.426525115966797, 11.262432098388672, -2.2024917602539062, -5.072471618652344, 16.927993774414062, 14.931304931640625, -15.422599792480469, 1.0053443908691406, 8.876155853271484, 3.4194869995117188, 2.1895065307617188, 11.099662780761719, 9.392654418945312, -0.139739990234375, -1.2926559448242188, 7.0938873291015625, 3.255115509033203, 15.897926330566406, 9.453638076782227, 2.2551727294921875, -1.4634857177734375, -0.1798248291015625, 7.0645751953125, -0.07106971740722656, -0.7672691345214844, -11.040374755859375, 11.6038818359375, 0.38382720947265625, 2.405029296875, -1.13970947265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000171.npy"}
|
||||
{"epoch": 0.2585034013605442, "step": 172, "batch_size": 64, "mean": 4.478647232055664, "std": 6.199909210205078, "min": -16.1076602935791, "p10": -1.8438606262207025, "median": 4.635497093200684, "p90": 11.984880638122558, "max": 18.076566696166992, "pos_frac": 0.78125, "sample": [-4.679718017578125, 5.5973968505859375, 1.6197128295898438, 3.2990188598632812, 2.1997451782226562, 1.807943344116211, 0.20444488525390625, 6.485141754150391, 9.138664245605469, 11.94981575012207, 16.880340576171875, 4.819732666015625, 3.810089111328125, -0.479766845703125, 8.145706176757812, 18.076566696166992, 8.155609130859375, -6.0428009033203125, -0.6335105895996094, 5.568260192871094, 7.11090087890625, 2.3041954040527344, -6.8280487060546875, -0.07221221923828125, 4.47100830078125, 4.799985885620117, 2.8739242553710938, 8.514877319335938, 7.446296691894531, 0.19603729248046875, 5.039455413818359, 7.910430908203125, 6.95062255859375, 8.744491577148438, 0.6345672607421875, -1.1944198608398438, 13.18801498413086, 7.7397613525390625, 11.999908447265625, 1.4813652038574219, -0.2134838104248047, 5.440376281738281, 5.559696197509766, 11.221006393432617, 10.595230102539062, -6.363407135009766, 1.2557754516601562, -0.01123809814453125, 3.316650390625, 6.842994689941406, -4.210792541503906, 10.638671875, 13.326148986816406, -16.1076602935791, 2.1630725860595703, 10.141143798828125, 0.7322540283203125, 2.9382705688476562, 1.4222068786621094, 14.527853012084961, -2.1221923828125, -0.2909526824951172, 16.384605407714844, 10.213638305664062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000172.npy"}
|
||||
{"epoch": 0.2600151171579743, "step": 173, "batch_size": 64, "mean": 4.859795570373535, "std": 8.364689826965332, "min": -11.772096633911133, "p10": -5.096933555603027, "median": 4.52475643157959, "p90": 17.235759735107422, "max": 21.853897094726562, "pos_frac": 0.6875, "sample": [-7.928068161010742, 2.0324478149414062, -1.6380081176757812, 5.859882354736328, 4.621635437011719, 13.53619384765625, 6.622079849243164, -1.4056777954101562, -1.096151351928711, 8.70379638671875, 1.3194808959960938, 5.091278076171875, 14.599403381347656, -9.869157791137695, -4.506635665893555, 17.42180633544922, 16.801651000976562, 1.7303390502929688, 4.427877426147461, 3.1643714904785156, 8.815444946289062, 18.237152099609375, -3.109546661376953, 18.072601318359375, -11.772096633911133, -8.894866943359375, 8.226264953613281, 10.4281005859375, 2.5686988830566406, 1.2691574096679688, -2.6883010864257812, 17.953811645507812, -3.3677978515625, 16.723846435546875, 7.1838531494140625, 2.3824691772460938, 12.774696350097656, -3.6731109619140625, 11.985420227050781, 12.92376708984375, 6.192169189453125, 2.2222824096679688, 2.751527786254883, 13.511337280273438, 5.7861328125, -11.126392364501953, -1.7152938842773438, -2.9428482055664062, 21.853897094726562, 12.514495849609375, 17.729398727416992, -7.012540817260742, 5.9584197998046875, 7.123283386230469, 16.123870849609375, -3.2493209838867188, -0.5021820068359375, -5.349918365478516, 5.139490127563477, 19.29156494140625, 0.8250503540039062, 8.760269165039062, -0.6142196655273438, 2.22833251953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000173.npy"}
|
||||
{"epoch": 0.2615268329554044, "step": 174, "batch_size": 64, "mean": 4.9458723068237305, "std": 7.315618515014648, "min": -12.700225830078125, "p10": -3.4917129516601557, "median": 3.8157644271850586, "p90": 15.108361053466801, "max": 22.279006958007812, "pos_frac": 0.765625, "sample": [3.8788223266601562, 7.1747283935546875, 12.165153503417969, 3.752706527709961, 6.399188995361328, -2.3572998046875, 1.8667984008789062, 9.339508056640625, 0.2396087646484375, 5.113491058349609, 14.101913452148438, 12.116317749023438, 1.3798599243164062, -0.9081573486328125, 10.0655517578125, -5.035999298095703, 11.498106002807617, 8.806453704833984, 7.164848327636719, -10.044490814208984, 15.539695739746094, 7.014411926269531, -2.068279266357422, 8.759529113769531, 21.528892517089844, -1.6830253601074219, 4.55609130859375, -1.4352989196777344, 18.67754364013672, 1.9093170166015625, 0.896270751953125, 22.279006958007812, 19.623010635375977, -4.039478302001953, 5.971065521240234, 9.554351806640625, 2.6018295288085938, 9.009506225585938, 0.5295009613037109, 15.704254150390625, -2.8836517333984375, 13.9354248046875, 4.4724884033203125, -0.4393043518066406, 0.7009048461914062, 9.695487976074219, 3.5989532470703125, -3.9384231567382812, 16.012306213378906, 3.0384292602539062, -12.700225830078125, 10.900115966796875, 1.6726341247558594, 1.19293212890625, 8.817672729492188, -3.0198516845703125, 6.130592346191406, -3.693939208984375, 8.921680450439453, -6.788291931152344, 1.1251354217529297, 3.6803741455078125, 3.61376953125, 0.8453292846679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000174.npy"}
|
||||
{"epoch": 0.26303854875283444, "step": 175, "batch_size": 64, "mean": 5.492076873779297, "std": 7.92834997177124, "min": -13.759490966796875, "p10": -3.468008422851562, "median": 4.538858413696289, "p90": 17.264179801940926, "max": 21.852294921875, "pos_frac": 0.71875, "sample": [-5.617204666137695, 9.138925552368164, -2.457305908203125, 1.9430599212646484, -2.010833740234375, 12.855533599853516, 0.19033241271972656, 11.206024169921875, 6.7545013427734375, -0.8010749816894531, 1.2283401489257812, 13.912437438964844, 5.4309539794921875, 7.111553192138672, -3.0776710510253906, 18.095094680786133, 5.8222808837890625, 13.182601928710938, 10.797622680664062, 1.6326217651367188, 3.9222030639648438, 3.9301910400390625, 4.2454833984375, 9.811279296875, 12.73486328125, 5.5322113037109375, -0.6367568969726562, -2.8739852905273438, 19.775596618652344, -13.759490966796875, 14.640281677246094, 0.6839447021484375, 5.005348205566406, 4.3116607666015625, -1.38714599609375, -0.027477264404296875, 4.691566467285156, 19.156814575195312, -1.2912654876708984, -8.075675964355469, 6.261756896972656, -2.8308868408203125, 2.1318817138671875, 4.386150360107422, -4.8837127685546875, -2.9160919189453125, 12.783538818359375, -5.385768890380859, 12.60394287109375, 15.32537841796875, 7.657489776611328, 14.770402908325195, 0.31664276123046875, -4.48583984375, 2.0674591064453125, 13.116340637207031, -3.635295867919922, 19.42583465576172, 9.851325988769531, 21.852294921875, 1.2528533935546875, 18.151565551757812, 8.704994201660156, 19.243202209472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000175.npy"}
|
||||
{"epoch": 0.26455026455026454, "step": 176, "batch_size": 64, "mean": 3.8402822017669678, "std": 8.007859230041504, "min": -14.678970336914062, "p10": -5.1958469390869135, "median": 3.5961380004882812, "p90": 12.81745395660401, "max": 27.47125244140625, "pos_frac": 0.65625, "sample": [19.849655151367188, -10.677558898925781, 8.022270202636719, 10.628240585327148, 0.6745986938476562, 3.7028274536132812, -0.9762496948242188, 7.040336608886719, 3.7219181060791016, 2.1940460205078125, -10.525894165039062, -5.424629211425781, -4.662021636962891, 8.162609100341797, -6.5778350830078125, 13.525373458862305, -0.15002822875976562, 10.475353240966797, -2.8192901611328125, -0.4555015563964844, -1.4110183715820312, -2.1526870727539062, 3.8275184631347656, 22.522075653076172, 6.4007568359375, 3.4894485473632812, -4.451881408691406, 6.216133117675781, -0.23370361328125, 10.01712417602539, 10.306976318359375, -1.4902172088623047, -1.1668014526367188, 11.165641784667969, 6.352151870727539, 20.022777557373047, 1.0679702758789062, 7.64288330078125, 1.3463554382324219, -5.8378448486328125, 1.5058059692382812, 2.093841552734375, 5.925500869750977, 13.564411163330078, 4.743328094482422, 9.650249481201172, -2.776031494140625, -3.41888427734375, 8.4932861328125, 16.761749267578125, -0.213714599609375, 10.686203002929688, 1.5498886108398438, 8.60687255859375, 1.5282211303710938, 5.315223693847656, 27.47125244140625, 0.356536865234375, -2.1973342895507812, 10.4903564453125, 6.116600036621094, -14.678970336914062, 5.2874755859375, -10.4456787109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000176.npy"}
|
||||
{"epoch": 0.2660619803476946, "step": 177, "batch_size": 64, "mean": 7.444478988647461, "std": 7.631320476531982, "min": -12.557968139648438, "p10": -0.8183425903320312, "median": 6.415336608886719, "p90": 18.553357124328617, "max": 24.86231231689453, "pos_frac": 0.859375, "sample": [3.063262939453125, 0.5364189147949219, 10.716054916381836, 24.044174194335938, 12.65570068359375, -0.6714019775390625, 3.4970855712890625, 9.0621337890625, 21.105026245117188, 6.524009704589844, 24.86231231689453, 14.516834259033203, 16.609966278076172, -1.5150909423828125, 5.632678985595703, -5.689897537231445, 19.079063415527344, 2.5878067016601562, -2.343080520629883, 9.527414321899414, 7.092010498046875, 6.306663513183594, 2.974691390991211, 13.510725021362305, 11.728448867797852, 1.3312911987304688, 4.792699813842773, 14.485702514648438, -2.1307315826416016, 3.0502490997314453, 8.474853515625, 4.538230895996094, 20.263565063476562, 7.91583251953125, 0.8632793426513672, 12.61376953125, 17.608495712280273, 5.982818603515625, 5.01458740234375, 9.773300170898438, 2.399951934814453, 4.370731353759766, 10.431991577148438, 2.0405044555664062, 11.496932983398438, -6.554069519042969, 1.2091293334960938, -0.881317138671875, 14.407958984375, 0.4870471954345703, 13.621383666992188, 0.3963451385498047, -12.557968139648438, 22.63479232788086, 6.6461181640625, 11.974189758300781, 6.799478530883789, 18.958297729492188, 10.23419189453125, 4.668458938598633, 5.764091491699219, 13.298526763916016, -0.1077117919921875, 4.716678619384766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000177.npy"}
|
||||
{"epoch": 0.2675736961451247, "step": 178, "batch_size": 64, "mean": 4.535033702850342, "std": 7.245887756347656, "min": -12.310295104980469, "p10": -4.423483276367187, "median": 3.675182342529297, "p90": 15.01695404052735, "max": 19.594032287597656, "pos_frac": 0.78125, "sample": [3.299234390258789, 13.056381225585938, 5.503448486328125, 1.399200439453125, 10.672943115234375, 0.8530673980712891, -1.9263038635253906, 7.686656951904297, 11.193099975585938, 7.4064788818359375, -4.625022888183594, 3.6966476440429688, 2.2566680908203125, -8.999364852905273, 5.350944519042969, 4.714626312255859, 16.391321182250977, -0.4220714569091797, -1.1711769104003906, 0.2812995910644531, 17.24451446533203, 1.384185791015625, 5.419189453125, -10.478973388671875, 3.8215599060058594, 1.057891845703125, 2.56402587890625, 7.780029296875, 3.0918426513671875, 3.042367935180664, 8.9971923828125, 0.059391021728515625, 1.8039169311523438, 18.804397583007812, 3.653717041015625, 9.35593032836914, 3.339630126953125, 3.3091659545898438, 19.594032287597656, 2.22967529296875, -7.3421173095703125, 11.949119567871094, 15.706085205078125, 6.234813690185547, -12.310295104980469, 13.333915710449219, -1.2978363037109375, -5.854541778564453, 16.004730224609375, 4.973066329956055, 13.408981323242188, 19.048477172851562, 5.385223388671875, 7.850860595703125, 2.3522682189941406, 1.48687744140625, 11.77923583984375, -0.01215362548828125, -8.9840087890625, -3.9532241821289062, 6.074268341064453, 6.6045989990234375, 8.681312561035156, -3.5692577362060547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000178.npy"}
|
||||
{"epoch": 0.2690854119425548, "step": 179, "batch_size": 64, "mean": 5.008025169372559, "std": 8.542933464050293, "min": -17.12469482421875, "p10": -4.905069732666016, "median": 4.9590911865234375, "p90": 14.051734161376952, "max": 27.48046875, "pos_frac": 0.703125, "sample": [8.166091918945312, -2.292449951171875, -0.4242095947265625, 6.136444091796875, -0.13800811767578125, 7.548013687133789, 6.951595306396484, 6.627471923828125, 20.217727661132812, 8.856895446777344, 12.213186264038086, 14.067314147949219, 9.939109802246094, -8.972396850585938, 8.200794219970703, -4.886009216308594, -17.12469482421875, -5.058197021484375, 1.7704277038574219, 4.644561767578125, -2.710826873779297, 13.204536437988281, 2.592437744140625, 11.73919677734375, 14.067359924316406, -8.884475708007812, -2.0255889892578125, 11.622669219970703, 2.4116954803466797, 3.656770706176758, 5.27362060546875, 11.421119689941406, 1.9384727478027344, -4.913238525390625, 2.33111572265625, -7.675773620605469, 1.433624267578125, 26.475921630859375, 10.860641479492188, -2.2207107543945312, 10.385297775268555, 9.382530212402344, 10.038629531860352, 8.204242706298828, -3.6383819580078125, 14.015380859375, 16.51207733154297, 3.3113174438476562, 18.141036987304688, 27.48046875, 8.819080352783203, 8.551536560058594, 12.79046630859375, 1.4581527709960938, -1.6128311157226562, 10.845428466796875, 1.8506393432617188, 0.635589599609375, 4.568033218383789, -2.270313262939453, -13.02985954284668, -0.29058265686035156, 11.598220825195312, -4.274797439575195], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000179.npy"}
|
||||
{"epoch": 0.2705971277399849, "step": 180, "batch_size": 64, "mean": 4.732954502105713, "std": 9.092035293579102, "min": -15.083099365234375, "p10": -5.902594757080077, "median": 3.4742298126220703, "p90": 18.5186637878418, "max": 24.37908172607422, "pos_frac": 0.734375, "sample": [15.136016845703125, 0.8396377563476562, 7.221261978149414, 1.7692337036132812, 2.61358642578125, 10.193778991699219, -10.440238952636719, 6.768947601318359, -0.7635593414306641, -3.2302284240722656, 16.82840919494629, 10.947338104248047, -5.326263427734375, -0.46402549743652344, 8.356735229492188, 0.7067451477050781, -6.0955810546875, 15.702804565429688, 0.08847427368164062, -12.3905029296875, 19.891326904296875, 5.035139083862305, 6.014196395874023, 7.0996856689453125, 8.272796630859375, 10.988178253173828, 0.13298416137695312, 2.0587997436523438, 4.471244812011719, 5.457908630371094, 7.454841613769531, 15.611801147460938, -6.772605895996094, 1.599029541015625, 6.296833038330078, 8.240631103515625, 1.6984329223632812, 19.82231903076172, 0.066986083984375, -2.1078109741210938, 2.7612133026123047, 9.693023681640625, 18.9321346282959, -0.7633609771728516, -4.423431396484375, 2.884082794189453, 23.917678833007812, -1.2038764953613281, 24.37908172607422, -0.28270530700683594, 18.627609252929688, -11.05099105834961, 2.191650390625, -5.452293395996094, 1.4209518432617188, 4.025836944580078, 23.429004669189453, -15.083099365234375, 5.735347747802734, -12.901145935058594, 9.413223266601562, 5.676784515380859, 18.26445770263672, 2.9226226806640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000180.npy"}
|
||||
{"epoch": 0.272108843537415, "step": 181, "batch_size": 64, "mean": 7.095475196838379, "std": 7.506069660186768, "min": -8.965347290039062, "p10": -1.8795213699340811, "median": 7.397879600524902, "p90": 16.728781890869143, "max": 25.82428741455078, "pos_frac": 0.84375, "sample": [5.5167999267578125, 12.402542114257812, 0.27732086181640625, 0.025859832763671875, 5.520580291748047, 3.399394989013672, 7.6475067138671875, -8.965347290039062, 2.9743728637695312, 1.7481460571289062, 11.677078247070312, 4.385776519775391, 5.888568878173828, -2.2820587158203125, 12.614931106567383, 9.665084838867188, 10.935796737670898, 7.61883544921875, 16.299842834472656, -3.9330596923828125, 8.311553955078125, 18.44853973388672, 12.992473602294922, 8.413246154785156, 23.8729248046875, 11.594985961914062, 4.717430114746094, 25.82428741455078, -0.0154266357421875, -7.6269073486328125, 13.225982666015625, 3.3844337463378906, 19.396759033203125, -6.769626617431641, 15.090103149414062, 1.2598648071289062, -0.9402675628662109, 17.39879608154297, 1.7359619140625, 0.93426513671875, 12.111038208007812, 3.7230873107910156, 8.101997375488281, 15.165626525878906, 13.53342056274414, 1.3342037200927734, 1.2630901336669922, 7.176923751831055, 8.152702331542969, 1.0252304077148438, 2.3176727294921875, 16.912612915039062, 20.87334442138672, 4.458778381347656, 9.5933837890625, 15.796672821044922, 4.48040771484375, 7.7802734375, 9.351749420166016, -3.275226593017578, 9.477561950683594, -0.5457553863525391, 13.138397216796875, -2.504150390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000181.npy"}
|
||||
{"epoch": 0.273620559334845, "step": 182, "batch_size": 64, "mean": 7.804678916931152, "std": 10.101659774780273, "min": -19.45655059814453, "p10": -3.4069549560546877, "median": 6.646540641784668, "p90": 20.70275917053223, "max": 27.714248657226562, "pos_frac": 0.75, "sample": [27.714248657226562, -4.919878005981445, -3.286243438720703, 9.302101135253906, 15.989349365234375, 13.262557983398438, 9.994918823242188, -4.085910797119141, -3.7653427124023438, 24.21417236328125, 11.017463684082031, 4.527595520019531, 25.725982666015625, -1.3595504760742188, 15.869590759277344, 10.204742431640625, 7.132682800292969, 4.941612243652344, 20.484668731689453, 6.408843994140625, 8.853864669799805, 0.6062355041503906, 3.0821075439453125, 0.3582611083984375, 11.975616455078125, -0.625396728515625, 18.67097282409668, 7.83259391784668, 19.99359130859375, 19.841203689575195, 25.812973022460938, 5.785432815551758, 11.158294677734375, 4.326515197753906, 20.796226501464844, -2.473846435546875, 0.51116943359375, 3.7533798217773438, 19.102005004882812, -0.11292457580566406, 4.605094909667969, -12.319116592407227, 22.880226135253906, -3.397705078125, 18.464431762695312, 12.676383972167969, 3.8875484466552734, -1.035736083984375, 16.133766174316406, 5.025352478027344, 11.631011962890625, 20.195175170898438, -3.410919189453125, 19.16768455505371, 22.538597106933594, 3.0101318359375, -1.909149169921875, 6.884237289428711, 11.30148696899414, 1.767538070678711, 1.8988227844238281, -9.055191040039062, -0.6055984497070312, -19.45655059814453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000182.npy"}
|
||||
{"epoch": 0.2751322751322751, "step": 183, "batch_size": 64, "mean": 3.044058322906494, "std": 8.136937141418457, "min": -21.100547790527344, "p10": -7.6130546569824205, "median": 1.8266372680664062, "p90": 15.478514099121096, "max": 17.74074935913086, "pos_frac": 0.65625, "sample": [1.6379547119140625, 14.719963073730469, -0.9475631713867188, -0.11746597290039062, 2.9338455200195312, -1.8196868896484375, -5.5092315673828125, 16.63959503173828, -9.159866333007812, -0.3245697021484375, -21.100547790527344, 1.130950927734375, 9.777658462524414, -3.2413177490234375, 8.989131927490234, 15.901477813720703, -0.7892036437988281, 6.8244171142578125, 11.770637512207031, 9.63546371459961, 7.8195648193359375, -5.0331268310546875, 14.561378479003906, -6.361013412475586, 0.9747848510742188, -3.1987457275390625, 7.3569488525390625, 1.6473464965820312, -9.754531860351562, -8.271575927734375, 16.117721557617188, 7.001434326171875, 1.5833587646484375, 1.4388046264648438, -8.089653015136719, 8.766700744628906, 5.7115325927734375, 16.902751922607422, -12.669342041015625, 17.74074935913086, -8.145198822021484, 0.5860137939453125, 15.677627563476562, -0.4144706726074219, 2.0059280395507812, 5.8554840087890625, 11.389352798461914, 3.3229331970214844, -6.5009918212890625, -0.9936122894287109, 1.2265396118164062, 7.042694091796875, 0.5193023681640625, 2.7142868041992188, 15.013916015625, -0.05550193786621094, 4.699188232421875, -5.073219299316406, 2.3396224975585938, 0.5476226806640625, 3.4638214111328125, 15.984268188476562, 7.797246932983398, 4.620147705078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000183.npy"}
|
||||
{"epoch": 0.2766439909297052, "step": 184, "batch_size": 64, "mean": 6.1167731285095215, "std": 7.885300159454346, "min": -7.051033020019531, "p10": -3.474619102478026, "median": 4.306911468505859, "p90": 18.195588302612308, "max": 23.090133666992188, "pos_frac": 0.765625, "sample": [2.5125579833984375, 13.1044921875, 18.516033172607422, 2.4459915161132812, 17.44788360595703, 5.38037109375, -1.321207046508789, -1.0217514038085938, 0.055278778076171875, 3.2147445678710938, 0.4130363464355469, 19.932884216308594, 1.972360610961914, 4.091701507568359, 12.777225494384766, -7.051033020019531, 16.365928649902344, 13.011589050292969, 5.481525421142578, 23.090133666992188, -4.489276885986328, 19.518526077270508, 8.015018463134766, 22.43421173095703, -5.310356140136719, 15.301513671875, 1.9470043182373047, 11.405563354492188, 20.01834487915039, 1.9407806396484375, 7.480796813964844, 0.5698471069335938, 6.489990234375, -5.246421813964844, -0.15237808227539062, 4.552581787109375, 19.416030883789062, 4.950780868530273, 0.8106613159179688, 14.580055236816406, 5.360185623168945, 1.3993453979492188, 14.253791809082031, -0.3534431457519531, 15.425193786621094, 7.149608612060547, 4.5118255615234375, 3.9568939208984375, -0.5659389495849609, 15.855018615722656, -5.515405654907227, -1.00921630859375, 3.0606536865234375, 4.101997375488281, 12.132965087890625, 14.68182373046875, -4.719242095947266, 5.546539306640625, -0.3135509490966797, 5.353767395019531, -3.9234237670898438, 1.3160457611083984, -2.427408218383789, 1.5424118041992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000184.npy"}
|
||||
{"epoch": 0.2781557067271353, "step": 185, "batch_size": 64, "mean": 6.345008850097656, "std": 11.145155906677246, "min": -16.84739875793457, "p10": -6.577707099914551, "median": 6.060493469238281, "p90": 21.468434715271, "max": 27.84355926513672, "pos_frac": 0.671875, "sample": [3.7975196838378906, 25.507240295410156, -2.4545974731445312, 16.131803512573242, 26.893264770507812, -15.612590789794922, 8.781909942626953, 4.319156646728516, -5.6166229248046875, 0.3392791748046875, 24.38813018798828, 23.74706268310547, -1.1525955200195312, -4.471748352050781, 21.592758178710938, 3.6799354553222656, 27.84355926513672, 4.256935119628906, 18.058815002441406, 19.148090362548828, -3.7516250610351562, 22.543960571289062, 7.974498748779297, 5.784416198730469, 1.9744091033935547, 13.71060562133789, 8.005325317382812, 11.490602493286133, 5.974822998046875, 11.330078125, -6.701667785644531, -1.7107009887695312, 12.524726867675781, 20.38983154296875, -16.84739875793457, 12.800666809082031, 14.408462524414062, 9.070337295532227, 9.791461944580078, -2.1519813537597656, -11.867576599121094, 7.504768371582031, -3.05474853515625, 10.140243530273438, 15.647232055664062, -11.869140625, 16.753019332885742, -1.797800064086914, 16.45166015625, -3.900625228881836, 2.7456207275390625, -12.527547836303711, 6.1461639404296875, -4.6678924560546875, 2.700225830078125, 7.39777946472168, -11.313545227050781, -6.28846549987793, 11.866676330566406, -2.7659454345703125, -2.6259193420410156, 5.018840789794922, 19.42107582092285, 21.178346633911133], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000185.npy"}
|
||||
{"epoch": 0.2796674225245654, "step": 186, "batch_size": 64, "mean": 7.096240043640137, "std": 11.377771377563477, "min": -17.552383422851562, "p10": -5.765814590454101, "median": 6.230628967285156, "p90": 21.303888511657714, "max": 30.451171875, "pos_frac": 0.703125, "sample": [18.427330017089844, 26.76018524169922, -14.030029296875, 5.9015045166015625, 8.005073547363281, -3.22320556640625, 11.30828857421875, -9.00747299194336, -5.894355773925781, 17.313392639160156, 10.805824279785156, -4.788417816162109, 8.489608764648438, 18.987133026123047, 30.451171875, 19.519245147705078, 4.476032257080078, 8.778728485107422, 3.53460693359375, -13.848678588867188, 2.787364959716797, 9.086706161499023, 14.788619995117188, 27.095722198486328, -17.552383422851562, -5.465885162353516, -5.026773452758789, -4.925174713134766, -0.6553802490234375, 2.8636741638183594, 5.857074737548828, 20.50907325744629, -2.098541259765625, -2.042205810546875, -3.354825973510742, -1.55145263671875, 9.118255615234375, 21.268142700195312, 18.975845336914062, 3.443981170654297, -1.8733596801757812, -11.638458251953125, 1.3535232543945312, 3.3168487548828125, 11.171157836914062, 5.8269500732421875, 21.3192081451416, 16.272491455078125, -8.223480224609375, 5.712532043457031, 29.354644775390625, 9.821229934692383, 5.950477600097656, 29.3076171875, 6.510780334472656, 7.8079376220703125, 29.846878051757812, 13.960420608520508, 10.626319885253906, 10.210424423217773, 5.6352996826171875, 10.645721435546875, -1.600738525390625, 7.757131576538086], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000186.npy"}
|
||||
{"epoch": 0.2811791383219955, "step": 187, "batch_size": 64, "mean": 6.648694038391113, "std": 10.5468111038208, "min": -21.251567840576172, "p10": -6.148561096191405, "median": 7.550651550292969, "p90": 20.274404907226565, "max": 28.218170166015625, "pos_frac": 0.703125, "sample": [13.357017517089844, -2.3853302001953125, 15.886138916015625, 22.793785095214844, 28.218170166015625, 16.711669921875, 8.127761840820312, 3.6984195709228516, 7.776580810546875, 19.159423828125, -13.594413757324219, 7.3247222900390625, -1.2162666320800781, 10.891128540039062, 13.2999267578125, 3.185667037963867, 1.681467056274414, 20.52812957763672, 11.221748352050781, 9.766599655151367, -9.605194091796875, -21.251567840576172, 17.892478942871094, -0.29459190368652344, 4.069488525390625, -0.38472747802734375, 1.4131431579589844, 0.329833984375, 1.9723663330078125, 16.487346649169922, 15.753044128417969, 19.134597778320312, -12.421714782714844, -2.5869407653808594, 8.587348937988281, 6.5643768310546875, 4.394344329833984, 0.7354049682617188, -2.7938690185546875, 12.053403854370117, -2.0487918853759766, -6.790016174316406, 19.698081970214844, -0.6622180938720703, 8.114974975585938, 11.972618103027344, 21.738784790039062, -1.1869430541992188, 9.391387939453125, 15.415542602539062, 20.751102447509766, -4.651832580566406, 13.448814392089844, 25.912445068359375, 4.406166076660156, -3.997528076171875, 16.094772338867188, 20.521400451660156, 4.552814483642578, 10.231101989746094, -1.1564178466796875, -12.456258773803711, -11.191680908203125, 10.927162170410156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000187.npy"}
|
||||
{"epoch": 0.28269085411942557, "step": 188, "batch_size": 64, "mean": 6.85283088684082, "std": 10.701106071472168, "min": -26.680747985839844, "p10": -4.019548606872558, "median": 4.841530799865723, "p90": 22.77563838958741, "max": 30.340904235839844, "pos_frac": 0.78125, "sample": [4.814414978027344, -3.6046886444091797, 25.751934051513672, -4.903524398803711, -1.149383544921875, 17.054702758789062, 14.344009399414062, 7.206872940063477, 16.30810546875, 2.404449462890625, 2.0114822387695312, 26.84307861328125, 17.373470306396484, -2.1789779663085938, 0.30712127685546875, 1.4255561828613281, 9.793861389160156, 23.672332763671875, 0.6264019012451172, -4.449602127075195, 8.196327209472656, 3.6260986328125, 0.7583084106445312, -0.9322967529296875, 8.192489624023438, 0.03289031982421875, -4.197345733642578, 16.709701538085938, 3.7794837951660156, 14.507549285888672, 12.971939086914062, 0.9256973266601562, 8.932952880859375, 30.340904235839844, 17.354324340820312, -0.08072662353515625, 19.648101806640625, 6.1614837646484375, -2.3353347778320312, 4.9619140625, 28.436321258544922, 1.7843170166015625, 6.186239242553711, 5.0799713134765625, 1.1313323974609375, 1.553131103515625, 4.868646621704102, 19.046600341796875, 2.8043289184570312, 0.47292327880859375, 23.93634033203125, -14.592803955078125, -3.1499557495117188, 8.822738647460938, 4.7271270751953125, 4.7031402587890625, 7.768585205078125, -6.988298416137695, 19.814720153808594, 24.472293853759766, 20.683351516723633, 5.327598571777344, -4.832807540893555, -26.680747985839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000188.npy"}
|
||||
{"epoch": 0.2842025699168556, "step": 189, "batch_size": 64, "mean": 6.704022407531738, "std": 10.496870040893555, "min": -18.84286880493164, "p10": -6.436648559570312, "median": 6.097606658935547, "p90": 21.679175567626956, "max": 27.98766326904297, "pos_frac": 0.796875, "sample": [2.7154388427734375, 7.7464752197265625, 9.01266860961914, 8.226539611816406, 6.723541259765625, 20.21514892578125, 19.82758331298828, 12.491743087768555, 0.32248878479003906, 3.91778564453125, -15.538192749023438, 21.94875717163086, -14.407089233398438, 1.740325927734375, -0.20806121826171875, -1.2491741180419922, 22.473533630371094, 13.862373352050781, -12.553749084472656, 9.231330871582031, 5.809577941894531, 17.087926864624023, 5.500614166259766, 9.219558715820312, -0.21584320068359375, 3.707235336303711, 12.937255859375, 7.858808517456055, 4.052181243896484, -1.4742507934570312, 21.375152587890625, -3.49493408203125, 4.147651672363281, 24.940635681152344, 21.809471130371094, 6.122093200683594, 0.704193115234375, 3.1733551025390625, 2.548206329345703, 15.657211303710938, 20.56747055053711, 8.61370849609375, 10.401067733764648, 5.8218536376953125, 13.331062316894531, 10.116886138916016, -5.3553314208984375, -15.749067306518555, 4.92041015625, 1.72125244140625, 0.5013198852539062, 19.27598762512207, -8.837783813476562, 27.98766326904297, 13.484176635742188, 6.0731201171875, -6.9000701904296875, -18.84286880493164, 10.073747634887695, 1.261749267578125, 6.9769134521484375, 0.8986129760742188, 22.610939025878906, 22.139062881469727], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000189.npy"}
|
||||
{"epoch": 0.2857142857142857, "step": 190, "batch_size": 64, "mean": 9.389444351196289, "std": 10.784980773925781, "min": -13.913665771484375, "p10": -3.8599397659301755, "median": 8.863602638244629, "p90": 24.19257850646973, "max": 33.3140869140625, "pos_frac": 0.796875, "sample": [6.342792510986328, 17.47113037109375, 31.226791381835938, -6.310546875, 13.896930694580078, 11.159042358398438, 1.2429275512695312, 0.910430908203125, -10.220239639282227, 27.48773193359375, 9.616020202636719, 17.97083282470703, 7.54456901550293, 1.234344482421875, 4.9767303466796875, 3.2418556213378906, -0.4682273864746094, 24.40057373046875, 4.673683166503906, 9.609264373779297, 19.766788482666016, 23.707256317138672, 2.588165283203125, 0.4302940368652344, -5.516696929931641, 7.74560546875, -13.913665771484375, 6.436389923095703, 5.208492279052734, 11.259681701660156, 10.675247192382812, -2.678203582763672, 8.506965637207031, 12.227294921875, 17.683334350585938, 0.6650714874267578, 25.13623046875, -3.882091522216797, -3.8082523345947266, 11.13992691040039, -0.3460540771484375, 17.240081787109375, 14.049629211425781, 33.3140869140625, 7.523435592651367, 1.4964370727539062, -4.972200393676758, 15.9049072265625, -7.5510101318359375, -2.1040191650390625, 9.220239639282227, 11.977577209472656, 3.041048049926758, 14.507743835449219, 17.269882202148438, 22.704071044921875, 30.3499755859375, 5.003974914550781, 19.728679656982422, 22.909637451171875, -1.6584701538085938, 25.63818359375, 19.411773681640625, 16.880382537841797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000190.npy"}
|
||||
{"epoch": 0.2872260015117158, "step": 191, "batch_size": 64, "mean": 6.834270477294922, "std": 11.271136283874512, "min": -26.330276489257812, "p10": -4.559243202209473, "median": 6.082916259765625, "p90": 19.494912338256835, "max": 31.12322998046875, "pos_frac": 0.828125, "sample": [18.206787109375, -2.4427528381347656, 5.7420196533203125, 15.813674926757812, 29.852375030517578, 0.2145233154296875, 0.742095947265625, 25.573501586914062, 6.23695182800293, 3.3925323486328125, 2.4514923095703125, 31.12322998046875, 3.0699844360351562, 0.2227630615234375, 10.690864562988281, 3.4564971923828125, -20.198959350585938, 2.7927627563476562, 19.287353515625, 9.72404670715332, 11.080154418945312, 14.82525634765625, 14.503530502319336, 18.522621154785156, -11.037353515625, 18.804519653320312, 8.5955810546875, 9.355873107910156, -4.698600769042969, 0.6688919067382812, 11.13395881652832, 0.5279617309570312, -2.0732059478759766, 1.7120323181152344, 25.27850341796875, -26.330276489257812, 0.4309425354003906, -9.213714599609375, 9.90493392944336, 16.13501739501953, 11.837158203125, 12.623966217041016, 5.4148101806640625, 13.043045043945312, 13.075069427490234, 2.887115478515625, 19.583866119384766, 21.712589263916016, -3.321798324584961, 1.6521377563476562, -4.234075546264648, 5.92888069152832, 21.22332763671875, 6.731689453125, -11.055553436279297, 11.48809814453125, 16.60076904296875, -18.545700073242188, 10.975749969482422, 0.7698135375976562, 1.5332489013671875, 18.508773803710938, 2.0786819458007812, 2.8032760620117188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000191.npy"}
|
||||
{"epoch": 0.2887377173091459, "step": 192, "batch_size": 64, "mean": 4.798130035400391, "std": 9.713309288024902, "min": -25.42566680908203, "p10": -4.557746124267578, "median": 3.5714664459228516, "p90": 17.845713233947755, "max": 29.251708984375, "pos_frac": 0.65625, "sample": [-25.42566680908203, -8.782096862792969, -0.3010749816894531, 9.778411865234375, 14.17032241821289, 1.9345703125, -3.9528636932373047, 12.476181030273438, 21.382858276367188, -4.642425537109375, -9.136039733886719, -4.360160827636719, -3.5462875366210938, 0.4517822265625, 2.8268508911132812, 7.6962127685546875, 18.541793823242188, 3.916168212890625, -0.3210563659667969, 3.7739181518554688, 18.07050895690918, 23.22216033935547, -2.525604248046875, -1.3403358459472656, 17.321189880371094, -0.4602775573730469, 2.1923828125, 12.192401885986328, 3.20758056640625, 3.3690147399902344, 11.826560974121094, 14.066658020019531, 8.91966438293457, 16.4754695892334, -2.2472057342529297, -2.604625701904297, 4.421966552734375, -0.3120613098144531, -3.5650100708007812, 4.695716857910156, 5.874334335327148, 0.2606086730957031, -14.270866394042969, 29.251708984375, 11.910354614257812, 7.3784637451171875, -4.0196075439453125, 3.2760353088378906, 5.326942443847656, -9.967536926269531, 5.848724365234375, 20.315868377685547, 1.87799072265625, 9.606422424316406, -1.578378677368164, 11.865303039550781, 14.603343963623047, -1.631011962890625, 15.436958312988281, 20.061500549316406, 10.03736686706543, -6.751323699951172, 3.146869659423828, 5.812694549560547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000192.npy"}
|
||||
{"epoch": 0.29024943310657597, "step": 193, "batch_size": 64, "mean": 8.818931579589844, "std": 10.746838569641113, "min": -22.008434295654297, "p10": -2.516829681396484, "median": 6.622766494750977, "p90": 23.123147010803223, "max": 33.840843200683594, "pos_frac": 0.796875, "sample": [1.6547470092773438, 5.099924087524414, 14.897626876831055, 4.6851959228515625, 6.242624282836914, -4.48211669921875, -0.6193161010742188, 21.70105743408203, -1.10113525390625, -0.11623954772949219, 21.844192504882812, 2.551809310913086, 14.757339477539062, 11.3692626953125, 22.562759399414062, -0.16873931884765625, 8.707817077636719, 1.8013916015625, 13.908267974853516, 17.52545928955078, -3.1536922454833984, 24.16271209716797, 33.840843200683594, -3.9929542541503906, 9.869338989257812, 22.97705078125, 2.4312477111816406, 7.4825592041015625, -2.4048538208007812, 10.250825881958008, 22.887359619140625, 10.156791687011719, 3.463390350341797, 12.429397583007812, 9.913742065429688, 4.526725769042969, -2.5648193359375, 0.25286865234375, 12.803169250488281, 6.504734039306641, -6.23394775390625, 3.3136634826660156, 22.24721908569336, 29.390235900878906, -2.7380313873291016, 3.207427978515625, 10.013883590698242, -1.3852214813232422, 3.6543655395507812, 25.081764221191406, 15.046279907226562, 0.44097328186035156, 28.1827392578125, 29.42063331604004, 23.148029327392578, 6.7407989501953125, 14.708602905273438, -22.008434295654297, 9.335731506347656, 23.065088272094727, 2.3675384521484375, 1.1965389251708984, 1.2581043243408203, 0.29927635192871094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000193.npy"}
|
||||
{"epoch": 0.29176114890400606, "step": 194, "batch_size": 64, "mean": 6.981878280639648, "std": 13.015533447265625, "min": -24.676475524902344, "p10": -10.377558708190916, "median": 7.456974029541016, "p90": 24.146266937255863, "max": 30.524751663208008, "pos_frac": 0.734375, "sample": [28.766319274902344, 6.628692626953125, 14.654426574707031, -11.150964736938477, 14.8966064453125, 2.3870582580566406, -16.41033363342285, 2.4266357421875, 2.0753097534179688, 11.024337768554688, 6.4778289794921875, 11.33551025390625, 6.4714202880859375, 19.262847900390625, -17.15917205810547, -4.750923156738281, 13.290454864501953, -8.572944641113281, 8.285255432128906, -2.5523223876953125, 13.283111572265625, 3.4244537353515625, 16.56695556640625, 20.401756286621094, -5.294635772705078, 9.270820617675781, -4.746025085449219, 17.58526611328125, -0.01016998291015625, 14.432533264160156, 1.4295692443847656, 22.376028060913086, -12.70254898071289, -7.4107818603515625, -21.555187225341797, -1.0312633514404297, 30.31708335876465, 11.097152709960938, 14.028690338134766, 24.900564193725586, -22.679725646972656, 9.036018371582031, 6.519046783447266, 2.3279781341552734, 16.39673614501953, 30.524751663208008, 11.0164794921875, -24.676475524902344, 28.27606964111328, 16.743133544921875, 14.919305801391602, -1.397796630859375, 12.773208618164062, 25.374284744262695, 23.409347534179688, 24.46208953857422, 11.629034042358398, 0.42891502380371094, 1.2244758605957031, 17.40325927734375, 3.742979049682617, 3.1219024658203125, -1.9592781066894531, 4.475053787231445], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000194.npy"}
|
||||
{"epoch": 0.29327286470143615, "step": 195, "batch_size": 64, "mean": 8.901931762695312, "std": 12.13087272644043, "min": -15.778350830078125, "p10": -6.281093597412107, "median": 8.003744125366211, "p90": 26.96925964355469, "max": 33.326141357421875, "pos_frac": 0.71875, "sample": [3.3145294189453125, 7.2724609375, -1.6948089599609375, 7.511329650878906, 30.352224349975586, 17.191192626953125, -0.3251304626464844, 7.652580261230469, 10.7364501953125, 28.978134155273438, 14.368385314941406, 18.771787643432617, -7.476165771484375, -8.205833435058594, -11.663604736328125, -15.778350830078125, -0.9822845458984375, 9.87179183959961, 9.388601303100586, 22.14925193786621, -1.5570526123046875, 5.51165771484375, 12.622591018676758, 15.521453857421875, 9.756576538085938, 8.14605712890625, 28.595563888549805, 28.386920928955078, 3.7936935424804688, 12.7012939453125, 5.053228378295898, -0.8629531860351562, 29.867591857910156, 8.321449279785156, 26.4881591796875, -1.4508056640625, -2.4017982482910156, 24.03076934814453, 17.328834533691406, 21.23101806640625, 7.861431121826172, -3.4925918579101562, -1.8718185424804688, -2.08984375, 0.8772392272949219, 3.1341304779052734, 1.954986572265625, 25.119346618652344, -13.870376586914062, -0.066986083984375, -10.07308578491211, 0.073394775390625, 33.326141357421875, 21.673479080200195, 17.149459838867188, 16.35561180114746, 16.101409912109375, 21.407203674316406, -10.225662231445312, 27.175445556640625, 8.541839599609375, 1.8760032653808594, 3.66754150390625, 12.602567672729492], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000195.npy"}
|
||||
{"epoch": 0.2947845804988662, "step": 196, "batch_size": 64, "mean": 7.3378376960754395, "std": 11.124900817871094, "min": -31.85926055908203, "p10": -4.329744529724121, "median": 6.621133804321289, "p90": 22.451548004150393, "max": 30.418991088867188, "pos_frac": 0.78125, "sample": [4.1664581298828125, -31.85926055908203, -4.317256927490234, 3.1373748779296875, 7.682844161987305, -8.038995742797852, 1.03564453125, 5.0619049072265625, 5.999229431152344, -3.5458297729492188, 2.83465576171875, 24.236595153808594, 22.53826904296875, 10.939411163330078, 9.014205932617188, 4.273979187011719, -15.599315643310547, 22.24919891357422, -2.0479202270507812, 20.664634704589844, -2.175262451171875, 5.020301818847656, 26.915523529052734, 1.116546630859375, -5.314060211181641, 4.706306457519531, 7.21209716796875, 7.873451232910156, 7.13690185546875, 30.418991088867188, 6.489948272705078, 16.273311614990234, 1.6781768798828125, -4.33509635925293, -2.1082611083984375, 13.666053771972656, 2.7238388061523438, 13.624841690063477, 8.65157699584961, 23.02764892578125, 13.072563171386719, 8.771102905273438, 19.71411895751953, 3.064197540283203, 17.76213836669922, -9.741729736328125, 1.0493717193603516, 6.7523193359375, 10.630363464355469, 23.3870849609375, 20.848854064941406, 19.81004524230957, -2.2485809326171875, -2.600189208984375, 10.31976318359375, 16.680908203125, 17.190366744995117, 13.574745178222656, 2.3522377014160156, 1.7412834167480469, 3.2870254516601562, -4.978694915771484, 10.068084716796875, 28.0855712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000196.npy"}
|
||||
{"epoch": 0.2962962962962963, "step": 197, "batch_size": 64, "mean": 9.155037879943848, "std": 14.896767616271973, "min": -28.570117950439453, "p10": -12.597487449645994, "median": 7.770508766174316, "p90": 27.16788959503174, "max": 34.87834930419922, "pos_frac": 0.765625, "sample": [-2.97528076171875, 32.229331970214844, 7.335432052612305, 2.4134368896484375, 6.947540283203125, 15.533821105957031, -13.8177490234375, -21.25531005859375, 2.613128662109375, -3.7108688354492188, 22.619422912597656, 27.01300621032715, -13.63436508178711, -15.101116180419922, 7.61474609375, 26.70740509033203, 13.728752136230469, 3.4717063903808594, 34.87834930419922, 28.405792236328125, 3.9359054565429688, 8.336387634277344, 20.371623992919922, 13.44061279296875, 12.56814956665039, 1.75811767578125, 5.03643798828125, -10.178106307983398, 25.17340660095215, 13.193656921386719, 26.34668731689453, 27.234268188476562, -20.141571044921875, 24.080501556396484, -3.488933563232422, -28.570117950439453, 3.9383392333984375, 11.246139526367188, 25.783203125, 13.708274841308594, -0.25865936279296875, 25.354637145996094, 27.820724487304688, 1.1664276123046875, -2.2259521484375, 31.209457397460938, 2.4080963134765625, 0.0211334228515625, 6.563106536865234, 21.734474182128906, -8.253782272338867, -3.6890945434570312, 17.962913513183594, 18.333066940307617, 3.6813535690307617, -18.291950225830078, 33.123687744140625, 19.08544921875, 1.487771987915039, 18.268321990966797, 4.639076232910156, 18.626663208007812, 7.926271438598633, 24.439054489135742], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000197.npy"}
|
||||
{"epoch": 0.29780801209372637, "step": 198, "batch_size": 64, "mean": 9.947700500488281, "std": 11.509800910949707, "min": -17.957687377929688, "p10": -5.02788391113281, "median": 9.717948913574219, "p90": 23.252384185791016, "max": 37.242523193359375, "pos_frac": 0.78125, "sample": [-2.82391357421875, 21.55742645263672, 0.2231006622314453, 12.717399597167969, 8.595359802246094, 11.185905456542969, -7.132234573364258, 6.2472076416015625, 6.980649948120117, 6.196868896484375, 20.88622283935547, 8.15399169921875, -0.32834625244140625, -11.86686897277832, 37.242523193359375, 28.8756103515625, 13.520584106445312, 8.82635498046875, 10.610137939453125, -1.895212173461914, 18.2032470703125, 17.096681594848633, 26.167478561401367, -8.799314498901367, 28.143280029296875, -14.538314819335938, -5.972442626953125, -0.06104278564453125, 22.386978149414062, 17.672082901000977, 18.901771545410156, 16.666946411132812, 11.699234008789062, -17.957687377929688, 9.600700378417969, -1.6282882690429688, 1.1839637756347656, 2.1300277709960938, -6.052215576171875, 10.185501098632812, 6.329826354980469, 20.428695678710938, 22.981670379638672, 13.576944351196289, 15.471715927124023, 8.3135986328125, -0.3849449157714844, 28.97673988342285, 35.50562286376953, 9.260208129882812, 5.283164978027344, 2.02227783203125, 18.31988525390625, 23.368404388427734, 4.3125, 9.835197448730469, 11.958938598632812, 12.23775863647461, -1.0580902099609375, 9.304485321044922, 11.40252685546875, 9.592609405517578, 20.877395629882812, 15.934333801269531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000198.npy"}
|
||||
{"epoch": 0.29931972789115646, "step": 199, "batch_size": 64, "mean": 7.714962959289551, "std": 14.886049270629883, "min": -26.696170806884766, "p10": -10.065461158752441, "median": 9.271129608154297, "p90": 24.673269653320315, "max": 36.201786041259766, "pos_frac": 0.6875, "sample": [17.624977111816406, 19.572647094726562, 3.2080841064453125, 10.270214080810547, -10.093313217163086, -19.599937438964844, 7.2919464111328125, 25.483963012695312, 16.21959686279297, 8.68682861328125, 14.77825927734375, -4.117103576660156, 36.201786041259766, 18.96314239501953, 33.63401794433594, 24.755813598632812, 19.197940826416016, 22.27072525024414, 17.396087646484375, 24.23397445678711, -3.6865234375, -17.577360153198242, 18.92171859741211, 23.794464111328125, -4.280492782592773, 0.38959503173828125, 12.531745910644531, -6.7544403076171875, 15.311668395996094, 18.887306213378906, 16.1339111328125, 2.7669334411621094, 16.38304901123047, -3.812408447265625, -0.42989253997802734, 3.44085693359375, 7.59075927734375, -10.000473022460938, -16.545928955078125, 14.104606628417969, -26.696170806884766, 12.729217529296875, 3.2573013305664062, 23.18537139892578, -22.237136840820312, 9.855430603027344, 8.59349250793457, 5.480991363525391, -6.499320983886719, 2.5247039794921875, -0.04090690612792969, -26.177337646484375, -8.356246948242188, 18.493824005126953, -9.378654479980469, -1.6598052978515625, 26.191558837890625, -5.590003967285156, 24.480667114257812, 14.380294799804688, 30.943214416503906, 2.0679244995117188, 30.989837646484375, 14.070648193359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000199.npy"}
|
||||
{"epoch": 0.30083144368858655, "step": 200, "batch_size": 64, "mean": 10.971943855285645, "std": 15.581605911254883, "min": -34.253082275390625, "p10": -6.367708396911621, "median": 9.23891830444336, "p90": 34.4784049987793, "max": 36.420677185058594, "pos_frac": 0.765625, "sample": [5.120044708251953, 21.887027740478516, 34.604278564453125, 5.699310302734375, 10.038528442382812, 1.8036670684814453, 29.909645080566406, 5.851234436035156, 7.2990570068359375, -4.281578063964844, 2.5089263916015625, 20.096221923828125, -9.807498931884766, 35.93182373046875, 1.3413238525390625, -2.5977916717529297, 6.594764709472656, -1.4440460205078125, 34.18470001220703, -9.960052490234375, 1.75213623046875, 0.8786697387695312, 17.675613403320312, 26.48611831665039, 1.1879158020019531, 17.200605392456055, 25.030458450317383, -11.670707702636719, 22.26791000366211, -2.3127708435058594, 29.367156982421875, 19.97559356689453, 9.372474670410156, 35.58811950683594, 35.86780548095703, -3.8239917755126953, -34.253082275390625, 20.542272567749023, 36.17533874511719, 28.361072540283203, 26.6763916015625, 16.76020050048828, 35.96636962890625, 9.105361938476562, 8.34368896484375, -23.114486694335938, 2.5612030029296875, 7.634735107421875, 9.482244491577148, 36.420677185058594, -0.28684234619140625, 12.224678039550781, 10.314085006713867, -6.451995849609375, 10.536933898925781, 15.775238037109375, -6.171037673950195, -6.606067657470703, 13.749824523925781, 7.095367431640625, 0.6542205810546875, -5.6934099197387695, 27.331398010253906, 29.447288513183594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000200.npy"}
|
||||
{"epoch": 0.30234315948601664, "step": 201, "batch_size": 64, "mean": 9.18124008178711, "std": 14.620272636413574, "min": -24.504119873046875, "p10": -10.966837692260743, "median": 7.789114952087402, "p90": 30.71069526672364, "max": 41.26549530029297, "pos_frac": 0.765625, "sample": [5.702301025390625, 13.00091552734375, 9.755107879638672, 8.144100189208984, 1.5102176666259766, 11.43747329711914, 21.989788055419922, 5.571249008178711, 13.26092529296875, 0.2125244140625, 2.1853103637695312, 20.457975387573242, 23.272146224975586, 16.34149169921875, 1.0693721771240234, -10.791290283203125, 41.26549530029297, 28.818382263183594, -0.22967529296875, 35.943145751953125, -12.818641662597656, 2.8443832397460938, 9.03125, -13.400249481201172, 2.6737747192382812, 31.521686553955078, 17.693477630615234, 14.411945343017578, -11.042072296142578, 28.359886169433594, 1.7131099700927734, 32.38417053222656, 2.571664810180664, -5.451332092285156, 21.83932113647461, 19.21725845336914, -1.2878456115722656, -13.045639038085938, 0.1786346435546875, 5.9088287353515625, 22.984100341796875, 8.300384521484375, -2.8491897583007812, 20.46246337890625, -6.121498107910156, 18.351585388183594, 11.162055969238281, -16.61639404296875, 7.43412971496582, 34.628204345703125, -2.536672592163086, 7.1884765625, 13.495147705078125, 22.638675689697266, 15.727449417114258, 35.97211456298828, 20.942276000976562, -7.7332916259765625, 1.9067306518554688, -11.484329223632812, 33.23770523071289, 0.744598388671875, 2.0482330322265625, -24.504119873046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000201.npy"}
|
||||
{"epoch": 0.30385487528344673, "step": 202, "batch_size": 64, "mean": 10.707111358642578, "std": 14.439509391784668, "min": -28.825782775878906, "p10": -5.101615142822265, "median": 11.492435455322266, "p90": 25.76209087371826, "max": 44.4866943359375, "pos_frac": 0.78125, "sample": [-5.215642929077148, 9.050949096679688, 39.24274826049805, 44.4866943359375, 25.772363662719727, -3.92388916015625, 19.527603149414062, 19.915626525878906, 22.713390350341797, 17.005882263183594, 3.347362518310547, 19.12009048461914, 4.577171325683594, 6.358009338378906, -3.4241714477539062, 20.146331787109375, 25.080238342285156, 2.158660888671875, 19.034210205078125, -5.486763000488281, 11.084266662597656, 9.422382354736328, 17.984954833984375, 3.2982177734375, -4.835550308227539, 21.338424682617188, 5.825349807739258, 38.07046890258789, -0.898040771484375, 20.306917190551758, -2.1713027954101562, 10.689413070678711, 15.386497497558594, 15.763015747070312, 2.9442138671875, -28.825782775878906, 18.715347290039062, 10.435989379882812, 19.70452880859375, -20.975669860839844, 11.900604248046875, 11.930221557617188, 16.57849884033203, 7.6885833740234375, 27.531570434570312, 10.143836975097656, 25.738121032714844, 4.369020462036133, 29.90838623046875, -2.4238739013671875, 0.5121383666992188, -20.723712921142578, -1.1746063232421875, -5.941921234130859, 2.740692138671875, 17.857376098632812, 29.168975830078125, 1.9526443481445312, 16.108444213867188, 18.46653175354004, -23.159507751464844, 24.032943725585938, 14.12055778503418, 25.179039001464844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000202.npy"}
|
||||
{"epoch": 0.30536659108087677, "step": 203, "batch_size": 64, "mean": 9.69023323059082, "std": 14.652880668640137, "min": -24.152496337890625, "p10": -5.75358543395996, "median": 6.983795166015625, "p90": 29.929145812988285, "max": 44.6597900390625, "pos_frac": 0.75, "sample": [6.4030303955078125, 1.0744247436523438, -14.036407470703125, -2.722137451171875, 3.793121337890625, 16.08453369140625, 1.3532180786132812, 15.204620361328125, 8.192909240722656, -4.768444061279297, 26.719284057617188, 11.336395263671875, 12.332674026489258, 36.757781982421875, 4.496683120727539, 44.6597900390625, 30.2764892578125, 3.4046630859375, 25.370403289794922, 13.6839599609375, -24.152496337890625, -0.28104400634765625, 13.690162658691406, -2.7614402770996094, -1.0276470184326172, -8.83843994140625, 7.23675537109375, 36.71038818359375, 3.2995223999023438, 19.673110961914062, 8.560417175292969, 4.535408020019531, 28.227310180664062, 8.752662658691406, 18.312408447265625, 1.2231063842773438, 30.62189483642578, 16.05021095275879, 30.229278564453125, 10.95222282409668, 1.94403076171875, -3.0072021484375, 3.2234115600585938, 11.858482360839844, 23.29999542236328, -2.3469467163085938, 29.228836059570312, 28.879470825195312, -6.175788879394531, 40.51238250732422, 17.049877166748047, 2.1385498046875, 0.6073226928710938, 5.9845733642578125, 5.7269287109375, -17.294292449951172, -0.5275192260742188, 6.7308349609375, 24.76602554321289, 21.312395095825195, 16.447132110595703, -17.890213012695312, -12.356220245361328, -0.5678768157958984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000203.npy"}
|
||||
{"epoch": 0.30687830687830686, "step": 204, "batch_size": 64, "mean": 10.324640274047852, "std": 15.105046272277832, "min": -30.65740966796875, "p10": -4.46272087097168, "median": 8.698892593383789, "p90": 34.560994720458986, "max": 41.053504943847656, "pos_frac": 0.75, "sample": [37.6287841796875, 29.189682006835938, -7.264339447021484, 6.706268310546875, 8.270904541015625, 41.053504943847656, 2.890625, 14.017053604125977, -8.891456604003906, 10.04052734375, 36.1092529296875, -20.048965454101562, -3.8483428955078125, 6.916912078857422, 11.408256530761719, 11.759124755859375, 18.31360626220703, 4.1378326416015625, -16.313762664794922, 0.14264678955078125, 21.06608772277832, 8.085289001464844, 21.508026123046875, 30.59729766845703, -1.5185737609863281, -4.195091247558594, 14.6973876953125, 11.50729751586914, 2.770801544189453, 6.282573699951172, 34.80995178222656, -4.109657287597656, 4.550928115844727, 13.459457397460938, 17.83855438232422, -3.33746337890625, 17.06415557861328, 37.44038009643555, -3.902517318725586, 5.6939697265625, 5.151233673095703, 4.847774505615234, 27.28711700439453, 29.570171356201172, -8.317569732666016, 9.126880645751953, 14.439346313476562, -1.7144107818603516, 22.895370483398438, -1.1414146423339844, 33.98009490966797, 34.974021911621094, 0.4910392761230469, 20.87920379638672, 12.9300537109375, 35.96438980102539, 16.13365936279297, 1.1067962646484375, 0.771697998046875, 16.689414978027344, -30.65740966796875, -4.577419281005859, 9.991640090942383, -2.5716094970703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000204.npy"}
|
||||
{"epoch": 0.30839002267573695, "step": 205, "batch_size": 64, "mean": 10.346155166625977, "std": 16.654483795166016, "min": -32.41106414794922, "p10": -8.253506088256836, "median": 10.371970176696777, "p90": 33.505200958251955, "max": 47.3128662109375, "pos_frac": 0.734375, "sample": [47.3128662109375, 4.563142776489258, -0.7910900115966797, 7.222629547119141, 13.143880844116211, 42.702388763427734, 10.745641708374023, 18.67571258544922, -29.719482421875, 30.649078369140625, 3.3201751708984375, 4.1926116943359375, 47.22382354736328, 13.983856201171875, 11.896369934082031, 1.26025390625, 8.978622436523438, -8.425811767578125, -5.661041259765625, 17.992298126220703, 7.195161819458008, 15.215850830078125, 2.7064342498779297, 8.985946655273438, 21.65692901611328, -2.2400894165039062, -8.869651794433594, -1.2983589172363281, 4.881111145019531, 39.234046936035156, -3.1468124389648438, -6.423490524291992, 15.845062255859375, 9.998298645019531, -1.996490478515625, 16.42205810546875, 14.497758865356445, 17.163963317871094, -2.0499420166015625, 20.91950225830078, 0.00836181640625, 28.505630493164062, 21.882183074951172, 18.246444702148438, 37.42626953125, 33.943603515625, 1.561065673828125, 12.095359802246094, 27.988319396972656, -8.878982543945312, 12.5924072265625, 7.8100128173828125, 19.729595184326172, 11.509689331054688, -26.45086669921875, 32.93781280517578, 23.09607696533203, -8.48162841796875, 33.74836730957031, -7.851459503173828, 0.558441162109375, -32.41106414794922, -0.2209930419921875, 14.84609603881836], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000205.npy"}
|
||||
{"epoch": 0.30990173847316704, "step": 206, "batch_size": 64, "mean": 7.520722389221191, "std": 17.664382934570312, "min": -30.147865295410156, "p10": -14.995520782470699, "median": 5.110720634460449, "p90": 33.19698181152344, "max": 39.35649871826172, "pos_frac": 0.65625, "sample": [-9.95517349243164, 0.3047332763671875, 3.413421630859375, -25.18212890625, -9.443746566772461, -1.8427658081054688, 4.643089294433594, 10.03445053100586, 24.495044708251953, 8.823524475097656, -0.25866127014160156, 38.036109924316406, 15.001834869384766, -3.36480712890625, -8.566474914550781, 17.669464111328125, -2.489643096923828, 36.161598205566406, -20.987030029296875, 5.352056503295898, 39.35649871826172, 18.60434341430664, 3.0981979370117188, 4.869384765625, -5.750570297241211, 31.900962829589844, 7.114946365356445, -25.206754684448242, 20.135087966918945, 24.867828369140625, 26.60669708251953, 35.670433044433594, 2.097686767578125, 6.517423629760742, 30.468093872070312, 0.1726226806640625, -30.147865295410156, -16.718284606933594, -5.316862106323242, 0.695220947265625, 17.920372009277344, 0.7489738464355469, 19.872665405273438, -7.664802551269531, -2.1682090759277344, -22.23165512084961, 35.05376434326172, 24.12744140625, 3.5650711059570312, 5.982574462890625, 19.125534057617188, 30.578224182128906, 33.752418518066406, -10.975738525390625, 18.240524291992188, 30.926734924316406, 5.9639434814453125, -0.5590972900390625, 5.9654693603515625, 36.98973083496094, -25.74662971496582, -1.1524505615234375, -0.17647552490234375, 12.307861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000206.npy"}
|
||||
{"epoch": 0.31141345427059713, "step": 207, "batch_size": 64, "mean": 10.788748741149902, "std": 16.04694366455078, "min": -22.207969665527344, "p10": -6.96345100402832, "median": 6.937107086181641, "p90": 35.813851928710946, "max": 45.17842102050781, "pos_frac": 0.75, "sample": [45.17842102050781, 0.7861042022705078, -3.9260387420654297, 2.3675460815429688, 11.811592102050781, 14.125675201416016, 0.7745819091796875, 4.53466796875, 1.74957275390625, 34.04247283935547, 9.53045654296875, 3.2966690063476562, -1.6216602325439453, 36.43303680419922, -3.5173282623291016, -6.512928009033203, 10.713958740234375, 22.1619873046875, 7.466793060302734, 30.008342742919922, 31.886699676513672, 7.938270568847656, 17.73723602294922, 3.247314453125, -10.30101203918457, -7.156532287597656, -3.54901123046875, 38.50048065185547, -8.490684509277344, 6.888496398925781, 34.36908721923828, 5.303394317626953, 43.03643798828125, -3.9121017456054688, 6.7822265625, -3.4215965270996094, -15.223810195922852, 16.745803833007812, 6.9857177734375, 1.9221267700195312, 19.018569946289062, 27.99146270751953, 3.848175048828125, 15.22890853881836, -0.128570556640625, 22.41284942626953, 20.13762664794922, -22.207969665527344, 30.034982681274414, 14.083038330078125, -18.961219787597656, 22.06313133239746, 1.868621826171875, 12.173004150390625, 4.4163360595703125, 41.83942413330078, 38.25767517089844, 4.616355895996094, 12.239791870117188, 17.39457893371582, -8.895092010498047, -1.5396575927734375, 4.41748046875, 41.47793960571289], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000207.npy"}
|
||||
{"epoch": 0.3129251700680272, "step": 208, "batch_size": 64, "mean": 6.538424968719482, "std": 15.935900688171387, "min": -33.238059997558594, "p10": -11.03727569580078, "median": 5.998318672180176, "p90": 28.992258453369143, "max": 37.841827392578125, "pos_frac": 0.609375, "sample": [11.697542190551758, 1.7212944030761719, 24.788795471191406, -5.352334976196289, 31.813247680664062, 29.253616333007812, 1.5228767395019531, -3.7508697509765625, -15.421875, -26.930831909179688, -3.95751953125, -11.55078125, 28.382423400878906, 17.25433349609375, 18.739444732666016, 3.7566070556640625, 7.539894104003906, 12.224105834960938, -4.584245681762695, -9.280548095703125, 21.452110290527344, -6.3306121826171875, 1.7527847290039062, -6.8336029052734375, 18.72395133972168, 33.59521484375, 25.83190155029297, -7.4404296875, 8.509895324707031, 17.502805709838867, -5.120695114135742, -1.1961212158203125, -9.839096069335938, 6.7149505615234375, -22.606536865234375, 17.824432373046875, 30.251323699951172, 19.54669189453125, 8.758331298828125, 0.49326515197753906, 37.841827392578125, 10.312217712402344, 6.084556579589844, -3.8823280334472656, 28.112430572509766, -4.488044738769531, 34.34259033203125, -2.329519271850586, 19.874582290649414, 5.912080764770508, -12.189598083496094, 19.20337677001953, -0.69622802734375, 19.175018310546875, -8.776599884033203, 10.998115539550781, 10.296844482421875, -6.092750549316406, -19.985496520996094, -1.4239158630371094, -33.238059997558594, 5.073646545410156, 13.977432250976562, 30.901283264160156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000208.npy"}
|
||||
{"epoch": 0.3144368858654573, "step": 209, "batch_size": 64, "mean": 13.261709213256836, "std": 19.057750701904297, "min": -42.119300842285156, "p10": -7.6778541564941385, "median": 11.90934944152832, "p90": 36.45089340209961, "max": 49.25872039794922, "pos_frac": 0.78125, "sample": [34.641117095947266, 35.045230865478516, 4.519805908203125, 12.472476959228516, -10.313766479492188, 41.37353515625, 14.035757064819336, 34.29167938232422, 6.143341064453125, 42.745086669921875, -3.4865341186523438, 4.7978515625, -14.783767700195312, 31.126022338867188, 1.69171142578125, 10.739845275878906, -1.9441680908203125, -1.331512451171875, -1.5738258361816406, 45.99617004394531, 2.4256134033203125, -8.871444702148438, 0.8332138061523438, 30.266708374023438, 5.6264801025390625, 5.9918975830078125, 21.400558471679688, 24.61663818359375, 39.76277160644531, 14.556838989257812, 23.96626091003418, 15.250579833984375, 4.723079681396484, 36.441734313964844, 36.45481872558594, 0.86004638671875, 36.179500579833984, 18.520111083984375, -42.119300842285156, 8.068023681640625, -8.387115478515625, 1.8767452239990234, -2.9793930053710938, -6.022911071777344, 35.234710693359375, 14.89263916015625, 49.25872039794922, 32.14431381225586, 16.044754028320312, 29.50442886352539, 5.35809326171875, -17.550851821899414, 4.254457473754883, 18.20697021484375, 11.346221923828125, 2.195322036743164, 23.965057373046875, -38.800018310546875, 10.713775634765625, 15.99378776550293, 34.18967056274414, 39.971927642822266, 16.28537368774414, -0.08745956420898438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000209.npy"}
|
||||
{"epoch": 0.31594860166288735, "step": 210, "batch_size": 64, "mean": 12.613653182983398, "std": 18.47022819519043, "min": -23.996482849121094, "p10": -12.074648666381833, "median": 9.429838180541992, "p90": 39.73747787475586, "max": 48.41347122192383, "pos_frac": 0.765625, "sample": [40.434654235839844, 9.847347259521484, 4.66009521484375, 14.140426635742188, -1.9131107330322266, 21.324569702148438, 48.41347122192383, 20.275466918945312, 42.52265930175781, 40.22660827636719, 2.2266197204589844, 40.49736022949219, 39.352577209472656, -0.4439239501953125, 39.902435302734375, 3.0773239135742188, -20.359285354614258, -23.996482849121094, 37.34368133544922, -13.752647399902344, 38.00553894042969, 16.44232177734375, 5.394582748413086, -1.199249267578125, 35.837745666503906, 1.2785186767578125, -1.0498771667480469, 8.694271087646484, 35.747703552246094, -5.482795715332031, 33.51935577392578, 1.380157470703125, 11.166290283203125, 6.670166015625, 13.138858795166016, -4.390846252441406, 1.4670791625976562, -9.049663543701172, 0.244903564453125, 35.896873474121094, -16.186609268188477, 18.4056396484375, 11.180030822753906, 4.946922302246094, 27.763404846191406, -0.5819931030273438, 15.437971115112305, 4.397705078125, 9.647781372070312, 3.5306739807128906, 12.884090423583984, 28.842500686645508, 9.211894989013672, -18.085344314575195, 3.9244155883789062, 43.39344024658203, 24.40522003173828, 27.480125427246094, 25.404272079467773, -22.0330810546875, 2.0880565643310547, -13.371070861816406, 7.763519287109375, 29.332427978515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000210.npy"}
|
||||
{"epoch": 0.31746031746031744, "step": 211, "batch_size": 64, "mean": 11.766897201538086, "std": 17.44631004333496, "min": -25.785980224609375, "p10": -8.163463973999022, "median": 7.558095932006836, "p90": 37.74198455810547, "max": 54.771820068359375, "pos_frac": 0.765625, "sample": [0.9072704315185547, -25.785980224609375, 15.329071044921875, 5.682910919189453, 0.6122817993164062, 1.3438358306884766, -7.212860107421875, -1.830413818359375, 10.52911376953125, 3.3794898986816406, 22.074310302734375, -0.4196739196777344, 21.502464294433594, 36.54975891113281, 8.119792938232422, 18.25313949584961, 2.0354976654052734, -0.5256233215332031, 32.90886688232422, -12.08755111694336, 4.544794082641602, -8.841506958007812, 21.061214447021484, -8.606689453125, 3.9180030822753906, -19.45697593688965, 1.42608642578125, 8.384719848632812, 42.9779167175293, 13.468952178955078, 22.528667449951172, 6.99639892578125, 11.637716293334961, 3.109710693359375, 39.553253173828125, 18.16879653930664, 9.967124938964844, 3.1759490966796875, -8.570865631103516, 21.12982177734375, 5.177539825439453, 54.771820068359375, 45.202545166015625, 3.419614791870117, -1.3648300170898438, 29.186412811279297, 38.18267822265625, 22.359405517578125, 15.126495361328125, 42.077972412109375, -19.042282104492188, 10.894733428955078, 36.71369934082031, 34.73121643066406, 31.024154663085938, 31.945119857788086, 6.2333221435546875, 18.21649169921875, -4.037275314331055, 40.33270263671875, -2.1570777893066406, -6.356224060058594, 0.8164939880371094, 1.6879119873046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000211.npy"}
|
||||
{"epoch": 0.31897203325774753, "step": 212, "batch_size": 64, "mean": 13.244911193847656, "std": 18.693326950073242, "min": -44.31689453125, "p10": -6.793784713745116, "median": 13.214553833007812, "p90": 39.31909637451172, "max": 47.24357223510742, "pos_frac": 0.796875, "sample": [39.81444549560547, 25.962421417236328, 26.551733016967773, 38.141014099121094, 1.7448844909667969, 3.298015594482422, 47.24357223510742, 19.753759384155273, -3.2353134155273438, 32.54649353027344, -2.8900489807128906, -44.31689453125, 32.83941650390625, 44.406219482421875, 7.867790222167969, 31.031394958496094, 30.439132690429688, 19.415678024291992, -17.382606506347656, 22.66425132751465, 12.936439514160156, 0.9054679870605469, 0.7921562194824219, 13.388885498046875, -27.457969665527344, 1.5990447998046875, 13.04022216796875, -1.9509220123291016, 38.257293701171875, 41.24797058105469, 23.48495864868164, 19.666194915771484, 26.244789123535156, 12.259140014648438, 15.404838562011719, 5.964988708496094, -5.189670562744141, 18.87458038330078, 0.8332080841064453, 13.87240219116211, 0.00341796875, 4.761314392089844, 0.6122779846191406, -3.4503936767578125, 15.965139389038086, 28.08666229248047, 15.877033233642578, 44.405296325683594, -13.049312591552734, -7.48126220703125, -1.651803970336914, 1.1382217407226562, 24.050216674804688, -7.822414398193359, 22.223037719726562, 39.77415466308594, 34.275230407714844, 40.69634246826172, 29.022369384765625, 0.44017791748046875, 6.245361328125, 7.7480621337890625, -14.456901550292969, 0.1927490234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000212.npy"}
|
||||
{"epoch": 0.3204837490551776, "step": 213, "batch_size": 64, "mean": 15.189022064208984, "std": 19.850238800048828, "min": -45.438926696777344, "p10": -5.390393447875975, "median": 11.220800399780273, "p90": 41.53652763366699, "max": 60.678741455078125, "pos_frac": 0.78125, "sample": [32.425113677978516, 5.516914367675781, 6.2268829345703125, 35.88408660888672, 17.21930694580078, 7.135919570922852, -8.556854248046875, -3.9720458984375, -0.21539306640625, 30.26342010498047, 40.783599853515625, 1.921030044555664, 5.826416015625, -5.998256683349609, -45.438926696777344, -10.077255249023438, 26.070920944213867, 60.678741455078125, -0.9873085021972656, 22.586585998535156, 33.69424057006836, -24.605587005615234, 39.41565704345703, 49.331016540527344, 32.74150085449219, 6.039033889770508, -2.5100574493408203, -16.13365936279297, 3.75067138671875, 9.150474548339844, -6.7917022705078125, 11.317794799804688, 18.30923080444336, 11.12380599975586, 19.816192626953125, 21.015411376953125, 3.265949249267578, 30.255332946777344, 38.55882263183594, 6.830604553222656, 6.6591033935546875, 10.651634216308594, 41.85921096801758, 1.4385738372802734, 34.04200744628906, 13.615936279296875, 1.8828964233398438, 28.523906707763672, 43.795013427734375, 2.6206283569335938, -2.1551132202148438, 27.0338134765625, -3.0635223388671875, 1.283599853515625, 4.111186981201172, 20.3526611328125, -0.300750732421875, 46.131683349609375, 15.657936096191406, 14.270843505859375, 39.69397735595703, 42.90519714355469, 37.197303771972656, 42.02205276489258], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000213.npy"}
|
||||
{"epoch": 0.3219954648526077, "step": 214, "batch_size": 64, "mean": 14.957748413085938, "std": 20.855300903320312, "min": -30.39678192138672, "p10": -10.626061630249021, "median": 15.058550834655762, "p90": 43.0771469116211, "max": 57.061981201171875, "pos_frac": 0.703125, "sample": [24.697458267211914, 21.637832641601562, -0.9802703857421875, 10.02505111694336, 11.111846923828125, 32.40423583984375, 3.1002578735351562, 8.53268051147461, 28.38422393798828, 40.100547790527344, 41.481361389160156, 32.203128814697266, 17.348913192749023, 1.08001708984375, 28.163375854492188, 39.634613037109375, 28.19620704650879, -28.576805114746094, 20.88404083251953, 28.036144256591797, -1.0762367248535156, 21.61363983154297, 12.7681884765625, -11.625518798828125, -0.31403160095214844, 40.31977844238281, -17.488784790039062, 26.44583511352539, 46.63192367553711, 35.23175811767578, 19.533958435058594, -7.6702423095703125, -5.593288421630859, 4.7173919677734375, 22.44036102294922, 35.956512451171875, -17.11517333984375, -3.4893798828125, 43.76105499267578, 57.061981201171875, -2.0968856811523438, -14.066097259521484, 36.591182708740234, 26.380104064941406, 47.46481704711914, 48.0977783203125, 8.306812286376953, 0.08494186401367188, 8.89956283569336, -8.293994903564453, -8.28143310546875, 49.712493896484375, 6.7372283935546875, -2.441446304321289, 4.865900039672852, -0.8245048522949219, 44.32373046875, 12.227523803710938, -30.39678192138672, 26.4671630859375, 17.72712516784668, 19.45779037475586, -3.5075912475585938, -19.714139938354492], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000214.npy"}
|
||||
{"epoch": 0.3235071806500378, "step": 215, "batch_size": 64, "mean": 15.225502967834473, "std": 21.887052536010742, "min": -32.702369689941406, "p10": -11.804839706420896, "median": 10.004352569580078, "p90": 42.237269973754884, "max": 51.72894287109375, "pos_frac": 0.703125, "sample": [14.544910430908203, -18.651100158691406, -32.702369689941406, 20.829681396484375, 10.423324584960938, -21.00853729248047, 6.7808380126953125, -3.7598190307617188, 39.140541076660156, 42.23556900024414, 6.952484130859375, -3.0252647399902344, -17.27684783935547, 10.95745849609375, 19.1212158203125, 47.121673583984375, 4.077423095703125, 37.83839416503906, 9.993553161621094, -0.9176788330078125, 8.130409240722656, 6.956146240234375, -0.8163032531738281, 30.927688598632812, 39.477455139160156, 0.7035942077636719, -2.72479248046875, 37.20708465576172, 37.0455322265625, -10.490777969360352, -8.10586929321289, 39.553550720214844, 3.851581573486328, -10.533523559570312, -4.946746826171875, -19.603805541992188, 31.04680824279785, 26.985916137695312, 33.69408416748047, 43.51226806640625, 5.174135208129883, 10.015151977539062, -5.824981689453125, -2.0056991577148438, 36.62977600097656, 40.519195556640625, 47.9354248046875, 48.03969192504883, 34.59725570678711, 51.72894287109375, 25.22979736328125, 41.057960510253906, 35.85033416748047, -18.54601287841797, 7.883136749267578, 1.6416912078857422, 7.11187744140625, 47.23670959472656, 35.549842834472656, -0.5127925872802734, 42.237998962402344, 3.7432308197021484, 36.94348907470703, -12.349689483642578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000215.npy"}
|
||||
{"epoch": 0.3250188964474679, "step": 216, "batch_size": 64, "mean": 16.23797035217285, "std": 20.852462768554688, "min": -42.49382781982422, "p10": -5.147084045410156, "median": 15.44714069366455, "p90": 42.42584228515625, "max": 83.85153198242188, "pos_frac": 0.765625, "sample": [2.5490493774414062, 41.72809600830078, 22.990753173828125, 34.99260711669922, 25.385822296142578, 9.246192932128906, 16.202369689941406, 5.322914123535156, 31.810386657714844, 41.449790954589844, 30.611251831054688, 21.872779846191406, 2.7394485473632812, 12.673995971679688, 14.707992553710938, -2.1569366455078125, 8.416255950927734, 12.062736511230469, -5.2707672119140625, 48.126373291015625, 26.549453735351562, 45.07249450683594, 21.77899932861328, 29.139015197753906, 22.523597717285156, 9.241558074951172, 45.69495391845703, 1.6483955383300781, 3.2618045806884766, 47.972930908203125, 33.89258575439453, -42.49382781982422, 11.396566390991211, 16.094097137451172, 14.36285400390625, 18.21258544921875, -22.397247314453125, 1.1008338928222656, 21.874523162841797, -3.663707733154297, -0.5281181335449219, 83.85153198242188, 42.724876403808594, -8.430885314941406, 10.136665344238281, -15.288764953613281, 17.353179931640625, 1.2956104278564453, -9.541343688964844, 25.353347778320312, 51.77103805541992, -14.75802230834961, -4.4574127197265625, -0.3547248840332031, 39.90117645263672, -0.3984718322753906, 14.80018424987793, 31.09923553466797, -4.858489990234375, 39.223968505859375, -1.6852474212646484, 32.478599548339844, 16.669830322265625, 16.148727416992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000216.npy"}
|
||||
{"epoch": 0.32653061224489793, "step": 217, "batch_size": 64, "mean": 16.92223358154297, "std": 21.393905639648438, "min": -32.935523986816406, "p10": -4.4944625854492175, "median": 12.26513671875, "p90": 46.58373184204102, "max": 60.67561340332031, "pos_frac": 0.78125, "sample": [28.894424438476562, 46.80663299560547, -6.823616027832031, 0.2949409484863281, -0.9612560272216797, -4.875553131103516, 2.6922264099121094, 60.67561340332031, -1.4451904296875, 1.663116455078125, 54.04559326171875, 54.34065246582031, 4.634254455566406, -3.6052513122558594, 12.30517578125, 4.999610900878906, 41.687110900878906, 25.496150970458984, 7.1053009033203125, 43.819923400878906, 9.220746994018555, 31.5443058013916, 4.91510009765625, 33.766876220703125, 8.600929260253906, 42.27281188964844, -32.935523986816406, -0.7113571166992188, 3.51348876953125, 14.079784393310547, 43.976078033447266, 24.12512969970703, 12.22509765625, -7.664344787597656, 24.87183380126953, -15.549911499023438, 4.496345520019531, 26.868398666381836, 0.9155426025390625, -16.933731079101562, -29.346603393554688, 43.52980041503906, 20.64881134033203, -2.1785125732421875, -2.0329971313476562, 3.7985610961914062, 7.857879638671875, 10.77679443359375, 17.764930725097656, 58.411956787109375, 20.48162078857422, 36.75859832763672, 8.804374694824219, 1.6647281646728516, 49.36811065673828, 23.148941040039062, 47.61241912841797, 23.874862670898438, 22.170257568359375, 40.4976806640625, 30.393600463867188, 46.063629150390625, 20.499980926513672, -0.893890380859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000217.npy"}
|
||||
{"epoch": 0.328042328042328, "step": 218, "batch_size": 64, "mean": 10.858057022094727, "std": 24.209278106689453, "min": -55.961265563964844, "p10": -16.58074359893799, "median": 8.755088806152344, "p90": 43.025214004516606, "max": 60.4732666015625, "pos_frac": 0.6875, "sample": [4.846710205078125, 1.0684242248535156, 12.117172241210938, 24.777442932128906, 1.5604248046875, -6.2574005126953125, 9.300682067871094, -2.785369873046875, 0.5973663330078125, 43.581207275390625, 10.183837890625, 12.11880111694336, 41.412025451660156, -39.603515625, -16.170791625976562, -28.931673049926758, 7.835609436035156, 46.462974548339844, -31.779144287109375, -1.0732269287109375, 27.904823303222656, 16.308868408203125, 3.9807357788085938, 13.792522430419922, 28.41232681274414, 60.4732666015625, 43.441036224365234, 54.41596984863281, 21.863927841186523, 26.02667236328125, -2.3672218322753906, 33.0604248046875, 19.380935668945312, -10.453046798706055, -9.55111312866211, -12.980186462402344, 1.62725830078125, 18.679433822631836, -2.0213470458984375, 35.658966064453125, 39.3609504699707, 21.744827270507812, -2.5313148498535156, 7.3560028076171875, 10.074337005615234, -16.756437301635742, 30.199495315551758, -0.5547313690185547, -22.498611450195312, -10.319900512695312, 35.87254333496094, -35.19963836669922, 16.211204528808594, 0.19913482666015625, 8.209495544433594, 37.094940185546875, 4.263668060302734, 51.709999084472656, -55.961265563964844, -3.780303955078125, 20.11452865600586, 7.33782958984375, 42.054962158203125, 53.798065185546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000218.npy"}
|
||||
{"epoch": 0.3295540438397581, "step": 219, "batch_size": 64, "mean": 11.58650016784668, "std": 20.129959106445312, "min": -32.022239685058594, "p10": -13.68406753540039, "median": 7.801174163818359, "p90": 41.74911499023438, "max": 49.89347839355469, "pos_frac": 0.703125, "sample": [7.9074859619140625, 45.413875579833984, 31.37750244140625, 7.206766128540039, 20.401145935058594, -0.8568572998046875, 2.629589080810547, 1.7074604034423828, 9.762199401855469, -14.258934020996094, -3.0057849884033203, 49.75330352783203, 25.0166015625, -15.658025741577148, -18.226593017578125, 16.906234741210938, 23.454444885253906, 3.1350784301757812, 18.652080535888672, 2.2012252807617188, 1.5202770233154297, -7.956695556640625, -12.34271240234375, 11.414012908935547, -20.94659423828125, -2.3726959228515625, 4.5564422607421875, -1.5130615234375, 17.73748779296875, 7.143348693847656, 36.25163269042969, 42.712867736816406, 42.37025451660156, 46.01771545410156, 32.438133239746094, 40.29978942871094, -7.1893768310546875, 1.9025650024414062, -3.5519790649414062, 18.773006439208984, 11.705072402954102, 13.6475830078125, -32.022239685058594, 16.269569396972656, -2.1151199340820312, -2.364898681640625, 30.720169067382812, 7.6411285400390625, 20.208812713623047, 49.89347839355469, 35.94182586669922, 36.397491455078125, 9.770023345947266, -2.4076766967773438, -31.07192611694336, 4.347507476806641, 5.238487243652344, 7.694862365722656, 34.69537353515625, -0.7591819763183594, 42.46900939941406, -24.515869140625, 36.58742904663086, 12.781936645507812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000219.npy"}
|
||||
{"epoch": 0.3310657596371882, "step": 220, "batch_size": 64, "mean": 13.637612342834473, "std": 23.849966049194336, "min": -38.68796920776367, "p10": -10.902857208251953, "median": 7.8449859619140625, "p90": 46.71364097595215, "max": 63.81248474121094, "pos_frac": 0.671875, "sample": [4.131687164306641, -16.99871063232422, 22.62481689453125, 6.939460754394531, 1.1744403839111328, 16.529129028320312, 56.36419677734375, 9.744827270507812, 4.878490447998047, 5.715850830078125, -4.093658447265625, 54.89448547363281, 3.918598175048828, -33.13629150390625, -10.527420043945312, -1.762969970703125, 47.39514923095703, 1.8676528930664062, -2.751506805419922, 0.9686965942382812, 36.15354537963867, 13.312255859375, -1.826711654663086, -31.946962356567383, -27.54816436767578, -14.064447402954102, -5.774103164672852, -11.063758850097656, -7.53399658203125, 31.139862060546875, 35.880828857421875, 26.399269104003906, 24.88018798828125, 42.17163848876953, 47.94683837890625, 2.8465652465820312, 40.623199462890625, 32.803531646728516, 34.20783996582031, -3.6361846923828125, 2.7647247314453125, 22.217403411865234, -5.474693298339844, 23.179157257080078, 63.81248474121094, -1.2491531372070312, 48.664947509765625, -0.32071685791015625, 12.389276504516602, 45.12345504760742, 37.664306640625, -5.593990325927734, 41.639381408691406, -0.31810760498046875, 15.982963562011719, 8.750511169433594, -38.68796920776367, 12.497119903564453, 38.658470153808594, 0.05876350402832031, 58.9273681640625, -5.294837951660156, 40.306270599365234, 24.261920928955078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000220.npy"}
|
||||
{"epoch": 0.3325774754346183, "step": 221, "batch_size": 64, "mean": 14.872674942016602, "std": 23.393848419189453, "min": -37.67931365966797, "p10": -10.736722564697265, "median": 10.09775161743164, "p90": 48.79479675292971, "max": 63.932342529296875, "pos_frac": 0.765625, "sample": [5.034650802612305, -10.515857696533203, 19.937332153320312, 21.64617156982422, -1.0880699157714844, 39.5185546875, 5.489835739135742, 5.454864501953125, 24.814796447753906, -24.39342498779297, -9.613967895507812, 9.126708984375, 13.22769546508789, 12.056068420410156, -16.58472442626953, 8.14898681640625, 4.338775634765625, -0.8034400939941406, 22.399362564086914, 23.93065643310547, 38.85287857055664, 41.76660919189453, 11.315357208251953, 41.296913146972656, 61.9639892578125, 4.190650939941406, 37.50738525390625, 60.38530349731445, -3.1995086669921875, 3.4586753845214844, 24.496597290039062, 51.80687713623047, 1.3172187805175781, 18.67486572265625, 60.38206481933594, 11.068794250488281, 33.71453857421875, -13.233650207519531, 27.289596557617188, 23.386123657226562, 31.59864044189453, -10.831378936767578, 3.2104873657226562, -1.356170654296875, 0.2706108093261719, -0.21103668212890625, 0.4201335906982422, 55.15879821777344, 1.2880325317382812, 8.203121185302734, -31.084518432617188, -37.67931365966797, 2.1845016479492188, 63.932342529296875, 37.394989013671875, -28.03166389465332, 8.400588989257812, 16.690841674804688, 57.38337707519531, 36.18431091308594, -0.8079833984375, 2.0037307739257812, 37.365760803222656, 11.596763610839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000221.npy"}
|
||||
{"epoch": 0.3340891912320484, "step": 222, "batch_size": 64, "mean": 19.838809967041016, "std": 25.062219619750977, "min": -38.100257873535156, "p10": -8.517474937438964, "median": 21.093637466430664, "p90": 49.34980354309082, "max": 67.24563598632812, "pos_frac": 0.78125, "sample": [-1.6978588104248047, -23.44739532470703, 24.806106567382812, 10.122604370117188, 43.64194869995117, 42.18737030029297, 21.439556121826172, -6.759082794189453, 48.46501159667969, 54.19853973388672, 19.829071044921875, 4.488616943359375, -34.425758361816406, 2.3498611450195312, 44.012413024902344, 3.8257369995117188, 8.650741577148438, 8.611942291259766, 8.470134735107422, 37.6976432800293, 30.00225830078125, -0.2785606384277344, 42.08465576171875, 44.08336639404297, -2.8872222900390625, 50.67152404785156, 1.0537338256835938, 64.61215209960938, -8.971460342407227, 37.09917449951172, 22.4099178314209, 8.064178466796875, 50.88963317871094, 45.05335998535156, 10.507827758789062, 24.4573974609375, 26.89409637451172, -9.404869079589844, -5.345163345336914, -7.4581756591796875, -38.100257873535156, 20.747718811035156, 25.749778747558594, 11.75079345703125, 2.1565093994140625, 67.24563598632812, 8.216117858886719, -37.956153869628906, 0.8726043701171875, 40.403289794921875, 44.43810272216797, -0.5960197448730469, 41.402252197265625, 57.941253662109375, 30.431480407714844, 9.156517028808594, 48.84432601928711, 44.54374694824219, 10.67059326171875, 48.42344665527344, 32.7021484375, 27.961318969726562, 49.566436767578125, -16.892791748046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000222.npy"}
|
||||
{"epoch": 0.3356009070294785, "step": 223, "batch_size": 64, "mean": 18.105669021606445, "std": 22.432788848876953, "min": -20.289501190185547, "p10": -7.437469673156736, "median": 17.10882568359375, "p90": 49.503249740600594, "max": 65.1253662109375, "pos_frac": 0.71875, "sample": [31.163528442382812, 5.226694107055664, 40.14924621582031, 7.4264373779296875, -3.1110916137695312, 30.936798095703125, -0.64263916015625, 28.551528930664062, 9.625404357910156, 29.76202392578125, 2.7111339569091797, 56.52864074707031, 34.379886627197266, 34.29662322998047, 3.3768577575683594, -13.293476104736328, -0.8621826171875, 32.553192138671875, 0.9647216796875, -4.395721435546875, 45.16914749145508, 19.306961059570312, -14.321292877197266, 34.661598205566406, -10.120033264160156, 4.5378570556640625, -0.8332691192626953, 31.71405029296875, -4.0078277587890625, -13.457321166992188, 35.95041275024414, 8.090534210205078, -14.771835327148438, 3.6968002319335938, -0.038909912109375, 57.03578567504883, 48.036842346191406, 26.323360443115234, -4.232818603515625, -20.289501190185547, -5.2077484130859375, -8.393064498901367, 25.871082305908203, 33.817901611328125, 54.30584716796875, 7.347278594970703, 16.959991455078125, 36.67955780029297, 20.8778076171875, 50.131710052490234, 64.96131134033203, -3.5791397094726562, 65.1253662109375, 54.993675231933594, 45.840484619140625, 22.970359802246094, 37.437049865722656, 42.00523376464844, 20.283382415771484, -2.7555465698242188, 1.1672210693359375, 17.257659912109375, 1.1723098754882812, 1.6949234008789062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000223.npy"}
|
||||
{"epoch": 0.3371126228269085, "step": 224, "batch_size": 64, "mean": 10.05770492553711, "std": 23.523534774780273, "min": -39.347774505615234, "p10": -18.917813873291017, "median": 4.881374359130859, "p90": 44.25226211547852, "max": 63.107147216796875, "pos_frac": 0.65625, "sample": [9.075920104980469, 63.107147216796875, 15.33717155456543, 14.395538330078125, 6.7670135498046875, 50.02861404418945, 25.3402099609375, 30.30044937133789, -1.080556869506836, 3.6555099487304688, 44.72957229614258, 4.766353607177734, 37.146400451660156, 43.1385383605957, -22.779117584228516, -0.8583049774169922, 5.588445663452148, 16.622724533081055, 15.299819946289062, 0.8462066650390625, 2.682861328125, 24.5257568359375, -0.30023193359375, -38.806243896484375, -10.919021606445312, -0.22076416015625, -8.459312438964844, -10.860504150390625, 16.71923828125, 0.5569915771484375, 39.286643981933594, -39.347774505615234, -25.917753219604492, -12.360382080078125, -5.3317718505859375, 0.14299774169921875, -0.801727294921875, -32.49846649169922, 4.996395111083984, 16.419296264648438, -23.941604614257812, 27.011703491210938, 37.86419677734375, 5.250236511230469, -18.774627685546875, 4.159507751464844, 13.278495788574219, 37.57157516479492, -18.97917938232422, -3.546600341796875, -1.8998451232910156, 21.679487228393555, 40.310401916503906, 47.08001708984375, 62.86390686035156, 20.45684814453125, 3.9257850646972656, 5.36578369140625, -0.04994773864746094, 51.56600570678711, -3.509735107421875, 49.1619873046875, 4.09088134765625, 1.8239707946777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000224.npy"}
|
||||
{"epoch": 0.3386243386243386, "step": 225, "batch_size": 64, "mean": 18.771116256713867, "std": 25.066499710083008, "min": -32.654541015625, "p10": -7.1790744781494125, "median": 14.214345932006836, "p90": 56.80294952392578, "max": 68.77622985839844, "pos_frac": 0.78125, "sample": [1.0281505584716797, 20.67449188232422, 5.4727325439453125, -0.69903564453125, -5.9443359375, 30.312255859375, -7.708248138427734, 0.34604644775390625, 32.11229705810547, 10.800487518310547, 7.255226135253906, 53.27630615234375, 7.563835144042969, 57.64439392089844, -14.05588150024414, 1.5842018127441406, 19.18798065185547, 3.6185989379882812, 40.85367965698242, 56.12007141113281, 57.095611572265625, 21.403783798217773, 63.75293731689453, 24.3499755859375, 68.77622985839844, 20.634693145751953, 2.230377197265625, 45.10455322265625, 54.81692123413086, -3.645671844482422, 1.0536651611328125, -26.372207641601562, 30.032249450683594, -8.38431167602539, 62.986351013183594, 23.096603393554688, 0.9331073760986328, 19.461456298828125, 35.561309814453125, 12.870014190673828, 2.1074295043945312, 6.00054931640625, 53.063438415527344, -0.7412586212158203, 27.610488891601562, -26.521575927734375, -17.144989013671875, -2.5698165893554688, 20.148414611816406, 8.800460815429688, 37.78330993652344, 0.5619487762451172, 4.657310485839844, 63.545387268066406, 47.506107330322266, -0.6200504302978516, 5.6382598876953125, 15.558677673339844, 60.08467483520508, 30.363801956176758, -32.654541015625, 25.57135009765625, 50.18775177001953, -2.7865753173828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000225.npy"}
|
||||
{"epoch": 0.3401360544217687, "step": 226, "batch_size": 64, "mean": 15.168936729431152, "std": 28.399503707885742, "min": -51.35661315917969, "p10": -18.595504379272455, "median": 10.985489845275879, "p90": 51.743204498291014, "max": 63.578521728515625, "pos_frac": 0.71875, "sample": [6.238067626953125, -20.36328125, 29.114761352539062, 21.88724708557129, 12.573257446289062, 16.3453369140625, 24.68853759765625, 2.4020347595214844, 35.0750617980957, -14.470691680908203, 43.766815185546875, 11.255952835083008, 39.178138732910156, 48.21772384643555, -35.479164123535156, -0.5327224731445312, 3.635547637939453, -51.35661315917969, 49.26780319213867, 26.051788330078125, 56.76612091064453, -3.2380218505859375, 45.906005859375, 3.3354415893554688, 29.768539428710938, 0.24640464782714844, 6.447113037109375, 51.61615753173828, 48.28199005126953, 34.35980224609375, -6.94645881652832, 55.86576843261719, 10.676311492919922, 17.057151794433594, 56.76750183105469, 5.8691864013671875, 30.440414428710938, -44.08428955078125, -2.459604263305664, 2.0005016326904297, 1.02410888671875, 6.1055145263671875, -12.683151245117188, -42.21741485595703, 57.43328857421875, 63.578521728515625, -32.39002990722656, 10.71502685546875, -39.306304931640625, 27.99535369873047, 10.013317108154297, 2.756570816040039, 15.804157257080078, 11.891275405883789, -0.716400146484375, 60.139793395996094, 48.169921875, 50.17937469482422, -6.691810607910156, -2.138914108276367, 51.79765319824219, -1.0960254669189453, -5.394105911254883, 49.670570373535156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000226.npy"}
|
||||
{"epoch": 0.3416477702191988, "step": 227, "batch_size": 64, "mean": 16.459735870361328, "std": 29.113689422607422, "min": -50.456878662109375, "p10": -20.04205780029297, "median": 5.987785339355469, "p90": 57.329393768310545, "max": 65.50874328613281, "pos_frac": 0.6875, "sample": [43.82466125488281, 14.193693161010742, 0.39215850830078125, 53.03392028808594, 3.4798736572265625, -0.3741569519042969, 41.19976043701172, 61.023841857910156, -50.456878662109375, -3.125856399536133, 64.87467956542969, -20.024391174316406, -2.1756420135498047, 60.535728454589844, 14.19775390625, 6.332611083984375, 1.5608329772949219, -1.9386329650878906, 53.1512451171875, -7.1461944580078125, 19.283615112304688, 52.37976837158203, 4.290454864501953, 5.356597900390625, 65.3664779663086, 26.21208953857422, -1.5479621887207031, 43.99269104003906, -0.6926670074462891, -2.9355926513671875, 35.44490051269531, 65.50874328613281, 31.879653930664062, 7.503814697265625, 4.7920379638671875, 41.40240478515625, 64.72859191894531, 2.6688156127929688, 20.79522705078125, 56.812721252441406, 4.213981628417969, 13.109474182128906, -24.92340087890625, 15.356613159179688, 45.557159423828125, -0.4623870849609375, 3.38177490234375, 3.142810821533203, -20.04962921142578, -0.050182342529296875, -37.80717468261719, 36.03554916381836, 16.73870849609375, 42.213348388671875, 3.330472946166992, -30.14923095703125, 5.6429595947265625, -0.5179634094238281, 57.53547668457031, -4.62896728515625, -41.5023193359375, -20.129623413085938, 54.735748291015625, 56.848533630371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000227.npy"}
|
||||
{"epoch": 0.3431594860166289, "step": 228, "batch_size": 64, "mean": 17.033653259277344, "std": 26.075437545776367, "min": -42.772830963134766, "p10": -12.882815170288083, "median": 13.417383193969727, "p90": 55.596464538574224, "max": 71.02039337158203, "pos_frac": 0.734375, "sample": [0.34031105041503906, 47.11359405517578, 2.6871795654296875, 58.366966247558594, 13.567794799804688, 24.577346801757812, 18.667879104614258, 60.94733428955078, 8.3094482421875, -27.20124053955078, 28.039602279663086, 1.622955322265625, 7.955291748046875, -42.772830963134766, 10.316352844238281, 2.0911331176757812, 52.23978042602539, -14.65875244140625, 8.192806243896484, 20.347278594970703, 0.4433403015136719, 54.464508056640625, -37.64209747314453, 12.865013122558594, -7.5045318603515625, 48.379730224609375, 39.27922058105469, 71.02039337158203, 6.026693344116211, 37.29895782470703, -8.015602111816406, -10.358856201171875, 47.77529525756836, -0.5599441528320312, 31.662553787231445, -3.480518341064453, 37.35487747192383, -1.709737777709961, -14.786788940429688, -7.006326675415039, 18.24169921875, 35.12702941894531, 27.692840576171875, -25.50969123840332, 15.608642578125, 2.0219078063964844, 60.29277038574219, 65.0240249633789, -13.96451187133789, 11.06890869140625, -4.1468505859375, 13.266971588134766, 23.681854248046875, 21.720008850097656, -4.009346008300781, 17.361595153808594, 56.08158874511719, 57.91529083251953, 47.98832702636719, 28.78472137451172, -7.615455627441406, 17.654373168945312, 36.86540985107422, 12.745292663574219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000228.npy"}
|
||||
{"epoch": 0.34467120181405897, "step": 229, "batch_size": 64, "mean": 15.885191917419434, "std": 30.11654281616211, "min": -58.0942497253418, "p10": -11.684605407714843, "median": 13.352779388427734, "p90": 54.05883865356446, "max": 68.71539306640625, "pos_frac": 0.75, "sample": [36.598724365234375, 53.569252014160156, 30.726181030273438, 26.960731506347656, 16.688827514648438, 20.33727264404297, 40.666831970214844, 25.4014835357666, 21.805164337158203, -31.18346405029297, 49.90558624267578, 29.205535888671875, 54.26866149902344, -6.837455749511719, 25.957229614257812, 5.792976379394531, -54.20648193359375, -7.487163543701172, 51.703514099121094, 51.40098190307617, 10.822040557861328, 1.3747062683105469, 18.655498504638672, 25.177406311035156, -34.53086853027344, 6.161434173583984, -10.835212707519531, -0.6955814361572266, 3.9743576049804688, -11.781715393066406, 0.90960693359375, 66.09829711914062, -58.0942497253418, 49.421051025390625, -11.458015441894531, 3.5067081451416016, 48.62541198730469, -38.40517044067383, 3.4465408325195312, 19.63787841796875, 55.06817626953125, 24.485570907592773, -0.24302101135253906, -1.9603462219238281, 44.41472244262695, 0.15764617919921875, 61.10498809814453, 15.88351821899414, 54.52484130859375, 6.370573043823242, 5.746879577636719, -1.0508766174316406, 31.922393798828125, 1.935577392578125, 64.70504760742188, 2.5718231201171875, 0.15301895141601562, -0.8819103240966797, 52.372711181640625, 68.71539306640625, 51.65592956542969, 0.6114158630371094, 3.010936737060547, -57.907257080078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000229.npy"}
|
||||
{"epoch": 0.34618291761148906, "step": 230, "batch_size": 64, "mean": 18.669940948486328, "std": 27.235300064086914, "min": -58.75505828857422, "p10": -3.0614618301391596, "median": 9.494701385498047, "p90": 58.02196731567384, "max": 87.91696166992188, "pos_frac": 0.84375, "sample": [-3.328510284423828, 10.285446166992188, 48.37934112548828, 2.055583953857422, 20.97547149658203, -29.830078125, 2.0139713287353516, 1.9435577392578125, 8.703956604003906, 62.98309326171875, 14.638763427734375, 63.15519714355469, 59.276123046875, 46.159751892089844, 21.228256225585938, 30.943220138549805, 24.588031768798828, 55.095603942871094, -26.206642150878906, 4.304901123046875, -58.75505828857422, 3.8939132690429688, 7.776702880859375, 33.17906188964844, 26.414840698242188, -2.004669189453125, 59.929847717285156, 8.108497619628906, 54.613861083984375, 49.02491760253906, 32.190765380859375, 3.7926597595214844, -1.0103797912597656, 2.047544479370117, 4.589515686035156, 12.675300598144531, 6.2151641845703125, 7.441070556640625, 87.91696166992188, 10.399887084960938, 5.850303649902344, 6.334724426269531, 3.5267982482910156, 28.932998657226562, 78.50261688232422, 36.890708923339844, -17.574254989624023, 63.148468017578125, 4.766960144042969, 1.8480491638183594, 46.714874267578125, -21.8603515625, 52.68470764160156, 39.312744140625, 18.368141174316406, 6.5089569091796875, 2.529600143432617, -2.4383487701416016, 10.698335647583008, 3.4456825256347656, 8.363350868225098, 48.05076599121094, -6.0306243896484375, 10.495609283447266], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000230.npy"}
|
||||
{"epoch": 0.3476946334089191, "step": 231, "batch_size": 64, "mean": 27.77908706665039, "std": 29.457223892211914, "min": -61.072723388671875, "p10": -4.932793807983399, "median": 32.66604423522949, "p90": 65.35907135009766, "max": 77.42996215820312, "pos_frac": 0.8125, "sample": [56.03105163574219, 13.115364074707031, 43.59199142456055, 60.81890106201172, 1.60589599609375, 25.141212463378906, 3.368541717529297, 64.3355712890625, 13.513206481933594, 9.646585464477539, 44.37001037597656, 2.398834228515625, 36.06636047363281, -4.679065704345703, -5.3633880615234375, 41.482322692871094, 11.888565063476562, 65.79771423339844, 18.385116577148438, -16.815773010253906, 42.031593322753906, -4.100006103515625, 45.663169860839844, 46.91884994506836, 5.863441467285156, 68.83953857421875, -5.041534423828125, -25.003223419189453, 67.96820831298828, 36.61190414428711, 60.92249298095703, -48.67049026489258, 34.72074890136719, 63.764617919921875, 51.74116897583008, 15.42010498046875, 3.9943923950195312, 32.575862884521484, -0.567291259765625, -2.7733230590820312, 77.42996215820312, 5.241889953613281, 29.514190673828125, 26.813278198242188, 41.942222595214844, 18.780317306518555, 28.751869201660156, -10.963878631591797, 13.398578643798828, -2.0133628845214844, 54.08037185668945, 63.77490234375, 32.7562255859375, 39.74079132080078, 53.256507873535156, 68.05078887939453, 35.451904296875, 40.801475524902344, 38.72698974609375, 45.97804260253906, -61.072723388671875, 27.576814651489258, 66.550537109375, 67.71466064453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000231.npy"}
|
||||
{"epoch": 0.3492063492063492, "step": 232, "batch_size": 64, "mean": 18.84949493408203, "std": 33.62873458862305, "min": -69.50921630859375, "p10": -14.799985885620115, "median": 11.459636688232422, "p90": 65.2188331604004, "max": 96.77053833007812, "pos_frac": 0.75, "sample": [1.2552337646484375, 62.57961654663086, 48.09960174560547, 70.98242950439453, 5.829366683959961, 57.66949462890625, 35.360023498535156, 58.416900634765625, -0.919952392578125, -21.403182983398438, -20.594135284423828, 5.501617431640625, -2.1044540405273438, 13.506450653076172, 22.952861785888672, 61.3471565246582, 0.24164199829101562, 2.1883773803710938, 31.077056884765625, 2.6728172302246094, 0.09497451782226562, 35.96996307373047, 35.231658935546875, 30.57861328125, 7.0273284912109375, 36.63751220703125, -6.305318832397461, 70.70140075683594, -11.457599639892578, 12.509956359863281, 28.936399459838867, 61.88987731933594, -11.919906616210938, -1.7464752197265625, 16.670745849609375, 2.28338623046875, 0.9615478515625, -49.08042907714844, 75.44285583496094, 18.51849365234375, 1.2014083862304688, 67.35211181640625, -12.497703552246094, 89.1663818359375, 65.59931945800781, 96.77053833007812, -13.727344512939453, -15.259689331054688, -6.448646545410156, 9.778409957885742, 24.874046325683594, 58.588043212890625, 41.8905029296875, 11.398368835449219, 2.707965850830078, -44.458526611328125, 0.47539329528808594, 64.3310317993164, 10.598987579345703, 29.788532257080078, -22.93024444580078, -69.50921630859375, 11.520904541015625, 17.55315399169922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000232.npy"}
|
||||
{"epoch": 0.3507180650037793, "step": 233, "batch_size": 64, "mean": 14.946308135986328, "std": 29.521711349487305, "min": -51.69474792480469, "p10": -17.23146324157715, "median": 6.816164016723633, "p90": 55.970916748046875, "max": 77.6192626953125, "pos_frac": 0.671875, "sample": [74.8057632446289, 10.163841247558594, 55.493255615234375, -25.410423278808594, 7.071544647216797, 24.441883087158203, 26.04095458984375, 19.963031768798828, -3.884593963623047, -8.033531188964844, 8.13494873046875, 77.6192626953125, -4.138042449951172, -0.1244659423828125, -1.0167007446289062, 16.249893188476562, 51.00289535522461, 3.9406814575195312, -2.5662612915039062, 1.0468826293945312, 60.63935852050781, 4.910667419433594, 48.32075500488281, 56.175628662109375, -16.777618408203125, 22.14696502685547, -9.813301086425781, 28.360431671142578, -32.411712646484375, -29.228439331054688, 51.220680236816406, 5.315784454345703, 56.21934509277344, 27.272197723388672, 46.97566223144531, -51.69474792480469, 12.252510070800781, 2.125883102416992, -3.124053955078125, 0.9907817840576172, 6.560783386230469, -41.218467712402344, 31.515045166015625, 0.081451416015625, 23.82923126220703, -9.09613037109375, -6.1968841552734375, 28.370132446289062, 50.15434265136719, 44.291229248046875, -17.425968170166016, -12.854333877563477, 1.143301010131836, 75.79327392578125, 32.14129638671875, 74.20811462402344, 3.4540348052978516, -6.954029083251953, 51.81822204589844, 4.112030029296875, 18.388145446777344, -0.1498870849609375, -24.247373580932617, 18.16858673095703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000233.npy"}
|
||||
{"epoch": 0.35222978080120937, "step": 234, "batch_size": 64, "mean": 15.141345977783203, "std": 32.58378601074219, "min": -47.923866271972656, "p10": -22.184631347656246, "median": 4.931041717529297, "p90": 63.705592346191416, "max": 74.24417877197266, "pos_frac": 0.65625, "sample": [5.717079162597656, -46.183258056640625, 4.8265533447265625, -15.482879638671875, 32.87852478027344, -1.2941932678222656, 2.1743297576904297, -14.207023620605469, 45.83843994140625, -18.684661865234375, 15.878257751464844, 28.65169906616211, 65.31143951416016, 17.9875431060791, 65.63160705566406, 48.149532318115234, -25.18280792236328, -40.47763442993164, 47.797027587890625, -2.6222591400146484, 3.9624691009521484, 1.458150863647461, 60.61968994140625, 74.24417877197266, -0.6605052947998047, -6.740913391113281, 59.88446044921875, -1.0248985290527344, -23.26177215576172, 9.915336608886719, 56.20783996582031, 70.27627563476562, 65.06910705566406, 54.57953643798828, -3.691549301147461, -47.923866271972656, -2.5180511474609375, 4.1197052001953125, 3.5716552734375, -18.221542358398438, 51.762786865234375, 44.12968444824219, 4.755271911621094, 31.378280639648438, 54.431922912597656, 2.7548255920410156, 3.7082748413085938, 25.700332641601562, 65.02812194824219, -11.983713150024414, -15.250732421875, 20.87540626525879, -35.428672790527344, 27.777835845947266, -46.0412483215332, 0.5125160217285156, 40.76551818847656, 7.9034881591796875, -1.2181167602539062, 5.035530090332031, 17.861183166503906, 70.80997467041016, 46.87635803222656, -19.671302795410156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000234.npy"}
|
||||
{"epoch": 0.35374149659863946, "step": 235, "batch_size": 64, "mean": 25.579490661621094, "std": 29.76803970336914, "min": -49.62345504760742, "p10": -3.350930786132812, "median": 22.07374382019043, "p90": 64.37050018310548, "max": 81.33921813964844, "pos_frac": 0.84375, "sample": [2.5054931640625, 19.551483154296875, 17.50708770751953, 31.193700790405273, 39.09187316894531, 20.45425033569336, 13.421310424804688, 81.33921813964844, 79.95706176757812, 50.26139831542969, 28.82745361328125, 53.19138717651367, 19.588134765625, 31.942106246948242, 62.87956237792969, 53.9194221496582, 24.43748664855957, 5.913974761962891, 52.6121711730957, 69.60476684570312, 5.1474456787109375, 9.295585632324219, 19.97601318359375, 65.052001953125, 61.38172149658203, 42.228965759277344, -3.0311050415039062, -3.935894012451172, 61.73115921020508, -49.62345504760742, 36.586456298828125, 2.271442413330078, 7.563259124755859, 18.920379638671875, 64.72203063964844, 9.768640518188477, 27.073543548583984, 5.5039825439453125, 31.16009521484375, -38.870147705078125, -7.849496841430664, 48.01922607421875, 23.6932373046875, 8.13726806640625, 48.498291015625, 5.936870574951172, -1.8108901977539062, 61.25350570678711, 77.745849609375, 6.4423065185546875, 12.262931823730469, -15.132827758789062, 70.00889587402344, 29.96540069580078, 0.33829498291015625, -49.56800842285156, 34.69501495361328, 50.88103485107422, 63.550262451171875, 4.077434539794922, 38.374481201171875, -3.4879989624023438, 2.7072486877441406, -2.772428512573242], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000235.npy"}
|
||||
{"epoch": 0.35525321239606955, "step": 236, "batch_size": 64, "mean": 19.57408905029297, "std": 34.44096374511719, "min": -78.89389038085938, "p10": -10.690386772155762, "median": 9.716266632080078, "p90": 67.07366561889648, "max": 85.35746765136719, "pos_frac": 0.734375, "sample": [58.89017105102539, -3.4622650146484375, 3.5284194946289062, 48.44343948364258, 10.314733505249023, 1.5539703369140625, 58.996070861816406, 19.714889526367188, -9.473884582519531, 16.47570037841797, 34.92500305175781, 7.363311767578125, -5.998693466186523, 5.0210418701171875, -7.882867813110352, 3.3545303344726562, 50.83564758300781, 60.984710693359375, 25.0938720703125, 66.45317077636719, 2.0591793060302734, -3.7030792236328125, 0.6027374267578125, -0.4365577697753906, 11.763603210449219, 10.023994445800781, 41.72325134277344, 53.909271240234375, 70.11471557617188, 44.52619934082031, 64.96090698242188, -3.6853790283203125, 8.355560302734375, 13.718631744384766, 48.14497375488281, -23.266998291015625, 4.891876220703125, 85.35746765136719, 4.160125732421875, 14.01832389831543, -48.04884338378906, 17.991825103759766, 60.164222717285156, 67.33959197998047, 45.3170166015625, 33.744659423828125, -1.1277389526367188, -12.585548400878906, 71.89299011230469, 2.784038543701172, 0.4049644470214844, -78.89389038085938, -71.60128784179688, 72.22311401367188, 71.66836547851562, 65.45292663574219, 9.408538818359375, -10.345418930053711, -11.079952239990234, -10.83823013305664, 70.20896911621094, -1.2297286987304688, 9.015378952026367, 8.505943298339844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000236.npy"}
|
||||
{"epoch": 0.35676492819349964, "step": 237, "batch_size": 64, "mean": 23.130733489990234, "std": 35.17315673828125, "min": -67.62501525878906, "p10": -21.391172790527342, "median": 19.68617820739746, "p90": 66.77902069091797, "max": 88.60569763183594, "pos_frac": 0.765625, "sample": [59.48210144042969, 62.146385192871094, 1.2080078125, 73.84321594238281, 0.6834945678710938, -21.034774780273438, 10.908287048339844, 18.129070281982422, 8.327373504638672, 56.486305236816406, 28.909622192382812, 30.29254150390625, -10.988616943359375, -1.8975677490234375, 56.18307113647461, 12.601341247558594, -1.997894287109375, 35.81297302246094, -33.77001190185547, 63.12785339355469, -67.62501525878906, 52.07448959350586, 21.2432861328125, 53.984954833984375, 84.75746154785156, -3.476490020751953, 50.169921875, 53.93889617919922, -23.566482543945312, 24.83696746826172, 65.40572357177734, 28.341156005859375, 7.444633483886719, -57.48701477050781, 42.41410827636719, 2.9929161071777344, 5.350166320800781, -50.89234161376953, 81.81231689453125, 7.899927139282227, 27.342849731445312, 17.262611389160156, 13.753746032714844, 22.072662353515625, 6.173088073730469, -0.3864307403564453, 40.26720428466797, 58.3656005859375, 26.7967529296875, 67.3675765991211, 63.25873565673828, 8.548851013183594, 73.88108825683594, 88.60569763183594, -23.00732421875, 7.677562713623047, -2.455545425415039, 62.16265106201172, 5.434574127197266, 29.750694274902344, -6.449787139892578, 15.316886901855469, -21.543914794921875, 72.09878540039062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000237.npy"}
|
||||
{"epoch": 0.35827664399092973, "step": 238, "batch_size": 64, "mean": 22.96609115600586, "std": 33.67013931274414, "min": -61.36724853515625, "p10": -18.815851593017577, "median": 20.764452934265137, "p90": 67.6037109375, "max": 81.20497131347656, "pos_frac": 0.734375, "sample": [-28.154972076416016, 7.392280578613281, 65.59259033203125, -16.21184539794922, 67.9749755859375, 74.30760955810547, 1.1140708923339844, 11.922943115234375, 70.33902740478516, -23.631574630737305, 16.468528747558594, -10.192184448242188, 69.7120590209961, 18.378517150878906, 46.494773864746094, 33.414493560791016, -1.6534671783447266, -13.359636306762695, 34.58479309082031, 73.0785903930664, 52.784217834472656, 47.16265869140625, 54.44862365722656, 45.59912872314453, 9.155815124511719, 66.2786636352539, 22.70561981201172, -17.39521026611328, -28.141735076904297, 51.3336181640625, 59.70298767089844, 47.86393356323242, 4.273828506469727, 66.7374267578125, -0.7450523376464844, 11.568267822265625, 34.40191650390625, -3.533782958984375, 56.90503692626953, 56.33319854736328, 3.491668701171875, -12.644538879394531, 1.3528213500976562, 19.880447387695312, 38.12091064453125, 42.69020080566406, 81.20497131347656, 30.463363647460938, 21.64845848083496, 40.649383544921875, 6.0347900390625, 45.140052795410156, 64.58810424804688, -19.424697875976562, -10.01805305480957, 1.0035667419433594, -61.36724853515625, 6.22833251953125, 78.63655853271484, -8.911468505859375, 23.629547119140625, -28.226417541503906, -32.377567291259766, 3.025888442993164], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000238.npy"}
|
||||
{"epoch": 0.35978835978835977, "step": 239, "batch_size": 64, "mean": 22.669845581054688, "std": 35.785037994384766, "min": -60.490516662597656, "p10": -18.349046516418454, "median": 17.570232391357422, "p90": 73.90716247558593, "max": 94.70332336425781, "pos_frac": 0.75, "sample": [13.548480987548828, 8.199264526367188, 39.321075439453125, -0.23511123657226562, 73.76100158691406, 9.287460327148438, -7.431282043457031, -0.43773651123046875, -10.409385681152344, 17.878036499023438, -14.963783264160156, 37.12882995605469, 0.15779495239257812, 76.64076232910156, 60.92335510253906, 4.789703369140625, -24.606121063232422, -33.627777099609375, 2.85736083984375, 70.20562744140625, 3.8476715087890625, 15.048179626464844, 60.88220977783203, 79.09551239013672, 73.96980285644531, 20.810625076293945, 41.3651237487793, 48.4386100769043, 29.91602325439453, 10.007675170898438, 69.51742553710938, 33.05943298339844, -35.32713317871094, -15.596624374389648, 19.53874969482422, 59.69261169433594, -2.1340370178222656, 9.357229232788086, -19.528656005859375, 34.60328674316406, -2.217508316040039, 41.861572265625, 14.980903625488281, 9.026519775390625, -55.10243225097656, 40.88359069824219, 86.6036148071289, 88.23782348632812, 94.70332336425781, -42.04417037963867, 0.2566051483154297, 8.66278076171875, -60.490516662597656, 64.77777862548828, 28.333984375, 53.34992218017578, 76.618896484375, -3.6520423889160156, 25.385086059570312, 48.518890380859375, 2.8388900756835938, 22.97864532470703, 29.544288635253906, 17.262428283691406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000239.npy"}
|
||||
{"epoch": 0.36130007558578986, "step": 240, "batch_size": 64, "mean": 29.717445373535156, "std": 35.578163146972656, "min": -70.44524383544922, "p10": -7.274905776977539, "median": 24.868454933166504, "p90": 79.09910583496095, "max": 93.27967834472656, "pos_frac": 0.8125, "sample": [43.013790130615234, -7.4575958251953125, 68.644775390625, 1.36572265625, 51.54597473144531, 90.89178466796875, 27.0872802734375, 37.80120086669922, -29.77388572692871, -26.417800903320312, 3.9761886596679688, -3.573507308959961, 56.92190170288086, 25.698286056518555, 51.85751724243164, 80.37287139892578, 56.59944152832031, -16.564062118530273, 76.36766815185547, 28.782028198242188, -6.848628997802734, 58.61712646484375, 30.703296661376953, -27.77759552001953, 21.903717041015625, 47.97926330566406, 17.126174926757812, 71.36888122558594, 91.21731567382812, 21.13269805908203, 66.44178009033203, 93.27967834472656, 6.403171539306641, 4.003387451171875, 1.6431655883789062, 61.809356689453125, 23.85657501220703, 63.079345703125, 72.98994445800781, 3.3711490631103516, 60.736793518066406, 5.223789215087891, -8.553089141845703, 36.02581787109375, 83.3456039428711, -70.44524383544922, -2.1221485137939453, 0.34726715087890625, 32.972267150878906, 3.2302093505859375, 24.038623809814453, 76.08011627197266, -2.8082809448242188, 19.493423461914062, 1.9425697326660156, 12.240989685058594, 80.26972198486328, 82.13787841796875, 41.368927001953125, 12.017839431762695, 61.368865966796875, 4.1043548583984375, -3.4854049682617188, 12.946197509765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000240.npy"}
|
||||
{"epoch": 0.36281179138321995, "step": 241, "batch_size": 64, "mean": 20.091632843017578, "std": 35.233211517333984, "min": -71.72753143310547, "p10": -12.2768009185791, "median": 17.579336166381836, "p90": 67.08916625976563, "max": 96.47600555419922, "pos_frac": 0.65625, "sample": [-8.331222534179688, -12.515159606933594, 51.512054443359375, -31.180978775024414, 41.799476623535156, -9.636276245117188, 24.488265991210938, 48.30792999267578, 57.980072021484375, 46.246185302734375, -5.835357666015625, 47.864768981933594, -11.720630645751953, 17.650522232055664, 7.713066101074219, 76.41364288330078, 1.9163742065429688, 2.9152965545654297, -0.5666885375976562, 61.01042175292969, 39.04059600830078, 25.715545654296875, -5.5279388427734375, -3.9643478393554688, 40.257354736328125, -3.6856250762939453, 62.25418472290039, -1.2191219329833984, -6.718475341796875, -20.709854125976562, 75.67034912109375, 92.79965209960938, 86.8734130859375, 27.576793670654297, -2.0176925659179688, -46.55467224121094, -5.042427062988281, 21.44622802734375, 21.948593139648438, 0.3181571960449219, -71.72753143310547, 2.9448318481445312, 0.1491851806640625, 64.01290893554688, 17.508150100708008, 23.55298614501953, -37.66746520996094, 96.47600555419922, 29.04663848876953, -36.89176940917969, 72.71441650390625, 42.99883270263672, 28.24951171875, 21.861175537109375, 68.40756225585938, -6.875270843505859, 7.229682922363281, 61.19697570800781, 52.010948181152344, 43.18586730957031, 3.1826725006103516, 10.615007400512695, -8.414888381958008, -2.394407272338867], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000241.npy"}
|
||||
{"epoch": 0.36432350718065004, "step": 242, "batch_size": 64, "mean": 27.119895935058594, "std": 37.807498931884766, "min": -74.6930160522461, "p10": -5.0030979156494135, "median": 20.311931610107422, "p90": 82.13563461303711, "max": 115.45889282226562, "pos_frac": 0.84375, "sample": [-55.204689025878906, 29.239517211914062, 19.826927185058594, 4.509250640869141, 35.57677459716797, -20.969379425048828, 83.12393951416016, 51.978492736816406, 16.38072967529297, 35.17326354980469, 58.656890869140625, 36.040897369384766, 0.07909965515136719, 96.2903823852539, 76.63836669921875, -11.2020263671875, 82.94369506835938, -5.209564208984375, 3.8991317749023438, 18.53652572631836, 14.151573181152344, 25.09650421142578, 80.25016021728516, 4.592672348022461, 2.0305328369140625, 20.79693603515625, 24.912933349609375, 2.6276168823242188, 64.29012298583984, 31.1490478515625, 57.30939483642578, 2.1606674194335938, 91.31146240234375, 13.530143737792969, 60.7320671081543, 14.239128112792969, 0.1970348358154297, 2.0873985290527344, 115.45889282226562, 14.229095458984375, -27.299896240234375, 34.883445739746094, 37.171836853027344, 3.09771728515625, 31.830360412597656, 1.995574951171875, 49.292762756347656, 6.419166564941406, 74.00973510742188, 72.36737060546875, 83.84992980957031, 47.02616882324219, 98.172119140625, -4.392084121704102, 0.032512664794921875, 58.32353973388672, 57.708648681640625, -0.0037021636962890625, 5.912406921386719, -74.6930160522461, 3.8739089965820312, -4.521343231201172, -43.51459503173828, 26.669267654418945], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000242.npy"}
|
||||
{"epoch": 0.36583522297808013, "step": 243, "batch_size": 64, "mean": 24.0076904296875, "std": 35.71516418457031, "min": -68.2177734375, "p10": -19.236414718627927, "median": 18.79292106628418, "p90": 74.55722045898438, "max": 86.12007141113281, "pos_frac": 0.734375, "sample": [3.239166259765625, 56.337890625, 37.40599822998047, -21.222244262695312, 58.515106201171875, 4.134836196899414, -68.2177734375, 5.3376617431640625, 66.5914535522461, -1.5142593383789062, 51.93394470214844, 24.522377014160156, -1.799957275390625, 18.769943237304688, 18.54387664794922, 82.41798400878906, -0.48123741149902344, -1.0556869506835938, 77.57691955566406, -6.0582275390625, 2.7444000244140625, 70.6722412109375, 29.241653442382812, -4.2623138427734375, 52.880096435546875, 38.34686279296875, 5.476966857910156, 47.53004455566406, 75.80338287353516, 34.680809020996094, -48.33465576171875, -35.388431549072266, 42.51948547363281, -8.391046524047852, 70.26760864257812, 40.20624542236328, 47.748504638671875, 74.03651428222656, -20.55517578125, 17.56561279296875, 86.12007141113281, 45.987281799316406, 41.43891906738281, 85.50697326660156, 2.098175048828125, -0.09511566162109375, 2.9570560455322266, 6.073286056518555, 74.78038024902344, 66.07402801513672, 1.477508544921875, 38.77485656738281, 4.919792175292969, 52.174293518066406, -35.829402923583984, 24.1474609375, 40.4592170715332, 18.815898895263672, -16.159305572509766, -12.421234130859375, -22.568260192871094, 80.605224609375, 6.2244415283203125, 7.16412353515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000243.npy"}
|
||||
{"epoch": 0.3673469387755102, "step": 244, "batch_size": 64, "mean": 31.035232543945312, "std": 34.912017822265625, "min": -52.000404357910156, "p10": -2.470337677001952, "median": 23.885733604431152, "p90": 80.16347045898438, "max": 90.15988159179688, "pos_frac": 0.84375, "sample": [8.463554382324219, 20.098731994628906, 18.538818359375, 33.256248474121094, -22.971267700195312, 23.846586227416992, -3.035573959350586, 80.2930679321289, 26.60576820373535, -23.74120330810547, 22.099040985107422, 65.73246765136719, -1.2214813232421875, 76.08973693847656, 38.69961166381836, 3.3535919189453125, 61.662879943847656, 79.6165771484375, -52.000404357910156, 77.58817291259766, 22.58746337890625, 11.375259399414062, -44.90501403808594, 54.1842155456543, 10.85052490234375, 72.8667221069336, 43.050270080566406, 60.54414367675781, 79.86107635498047, 12.361368179321289, 88.1865234375, 11.133064270019531, 2.393003463745117, -11.620460510253906, -0.8177261352539062, 17.15904426574707, -0.6006317138671875, 66.42627716064453, 59.30955505371094, 90.15988159179688, 2.7109413146972656, 62.466796875, 0.9806976318359375, 13.984817504882812, 2.1451873779296875, 85.2425537109375, 23.924880981445312, 57.51854705810547, 5.119407653808594, 28.561779022216797, 2.17645263671875, 8.468093872070312, 79.5302734375, 34.62986755371094, 85.98432922363281, 36.475189208984375, -3.0055618286132812, 26.527420043945312, 4.571163177490234, 3.7297897338867188, 45.57481384277344, 33.651023864746094, 84.69977569580078, 83.10711669921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000244.npy"}
|
||||
{"epoch": 0.3688586545729403, "step": 245, "batch_size": 64, "mean": 36.077369689941406, "std": 36.25455093383789, "min": -50.148712158203125, "p10": -4.467957878112792, "median": 36.6451416015625, "p90": 83.8615608215332, "max": 96.93688201904297, "pos_frac": 0.84375, "sample": [42.87555694580078, 52.777801513671875, 46.96733474731445, 83.99028015136719, 13.038572311401367, 7.906883239746094, 3.525409698486328, 71.76885223388672, 71.4650650024414, 43.26580047607422, 1.3417434692382812, 18.29119873046875, 80.15827178955078, 50.76803207397461, 37.70519256591797, 78.56623077392578, -1.3157119750976562, 72.82111358642578, -32.260162353515625, 96.93688201904297, 92.2699203491211, 2.7944259643554688, -1.811492919921875, 36.269622802734375, 74.09208679199219, 18.516006469726562, 29.41455078125, -3.864471435546875, 75.86817932128906, 1.343698501586914, 68.0360107421875, -43.86382293701172, 1.1144638061523438, 55.130027770996094, 7.476997375488281, 34.13999938964844, -9.056716918945312, 30.34978675842285, 63.72542953491211, 91.41878509521484, 38.446617126464844, 2.7479705810546875, 83.59973907470703, 13.486106872558594, -4.726594924926758, 44.1007080078125, 83.97377014160156, 82.20160675048828, 2.876312255859375, -6.286712646484375, 37.020660400390625, 71.01910400390625, -8.227294921875, 58.5632438659668, 93.01902770996094, 34.92251205444336, -50.148712158203125, 85.5318832397461, 33.43760299682617, 12.161262512207031, 4.58050537109375, 60.985107421875, 24.266986846923828, 47.44254684448242], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000245.npy"}
|
||||
{"epoch": 0.37037037037037035, "step": 246, "batch_size": 64, "mean": 28.857349395751953, "std": 45.17575454711914, "min": -80.50135803222656, "p10": -14.316067504882811, "median": 18.23645782470703, "p90": 84.96723098754883, "max": 123.21987915039062, "pos_frac": 0.75, "sample": [72.07159423828125, 77.6250991821289, 82.69152069091797, 79.65298461914062, 14.623153686523438, 5.0584564208984375, -71.0501708984375, 90.79658508300781, 28.01329803466797, -11.922677993774414, 64.24134826660156, 69.97508239746094, 37.5482177734375, -4.432945251464844, 51.72120666503906, 56.84916305541992, 11.161956787109375, -5.332431793212891, -5.534934997558594, -1.7302398681640625, 9.85211181640625, 51.84480285644531, -0.6238174438476562, 85.94253540039062, 97.85950469970703, -33.12409973144531, 31.034347534179688, 123.21987915039062, -15.045257568359375, 5.8960113525390625, 13.955698013305664, 79.32572174072266, 12.5137939453125, 4.419851303100586, -45.38555908203125, 36.3165283203125, -71.69876861572266, 9.688985824584961, 86.10771179199219, 1.7831268310546875, 49.18737030029297, -42.39550018310547, -10.46673583984375, 95.2015609741211, 27.272502899169922, 2.5131607055664062, 64.46025848388672, 76.51399230957031, 71.28912353515625, -80.50135803222656, 2.9774932861328125, 19.916900634765625, 65.72281646728516, 99.49100494384766, 13.522014617919922, 0.06993865966796875, 76.77735137939453, 77.73345947265625, -1.2786865234375, -12.6146240234375, 39.55289840698242, 16.556015014648438, 63.785552978515625, 5.6744384765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000246.npy"}
|
||||
{"epoch": 0.37188208616780044, "step": 247, "batch_size": 64, "mean": 16.175758361816406, "std": 39.1475944519043, "min": -82.62789154052734, "p10": -31.5140769958496, "median": 10.068771362304688, "p90": 66.38763732910157, "max": 115.045166015625, "pos_frac": 0.703125, "sample": [-52.08628845214844, 38.0665283203125, 49.8953857421875, 5.188789367675781, 2.6237335205078125, -7.5436553955078125, 5.6080322265625, 85.38949584960938, 85.78948974609375, -6.2850494384765625, 31.877883911132812, 36.867828369140625, 66.85419464111328, 25.654199600219727, 10.274520874023438, 48.890167236328125, 62.70201110839844, 30.138370513916016, -5.103822708129883, -42.469696044921875, 115.045166015625, 80.21653747558594, -62.69291687011719, -8.271905899047852, 4.8682708740234375, -4.096168518066406, 48.78089904785156, -3.7340621948242188, -35.77128601074219, 49.434425354003906, 12.530197143554688, 6.577323913574219, 16.275100708007812, 3.726104736328125, 2.2965240478515625, 9.457656860351562, 54.31393814086914, -82.62789154052734, -17.093002319335938, 1.0027332305908203, 1.1456108093261719, 9.863021850585938, 2.10400390625, 71.07713317871094, 73.57693481445312, 8.554203033447266, -0.5165443420410156, -21.580589294433594, 13.334049224853516, 40.691864013671875, 15.060562133789062, 43.917869567871094, -16.93157958984375, 13.123636245727539, -64.5301513671875, 46.13104248046875, 56.52664566040039, 65.29900360107422, -10.280570983886719, -54.53288269042969, -7.925384521484375, 57.32994079589844, 16.011749267578125, 15.229265213012695], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000247.npy"}
|
||||
{"epoch": 0.37339380196523053, "step": 248, "batch_size": 64, "mean": 30.569293975830078, "std": 45.477333068847656, "min": -57.25453186035156, "p10": -23.797151947021483, "median": 18.729108810424805, "p90": 91.48460083007812, "max": 111.24129486083984, "pos_frac": 0.71875, "sample": [5.294898986816406, 78.13937377929688, -32.96885681152344, -45.891632080078125, 98.46736907958984, 83.17684936523438, 68.95021057128906, 70.75119018554688, -0.7501392364501953, -21.5179500579834, 1.6194992065429688, 91.28150939941406, 19.513168334960938, -46.63429260253906, 83.26565551757812, 41.55182647705078, 98.88958740234375, -15.958213806152344, -53.42620849609375, 18.887859344482422, -22.88690948486328, 6.6332855224609375, 23.15404510498047, 15.050954818725586, 84.79021453857422, 73.52325439453125, 81.55068969726562, 10.904258728027344, 49.56193161010742, 91.05818176269531, -25.400611877441406, 75.85002136230469, -3.755298614501953, -14.521936416625977, 91.57164001464844, 84.69963836669922, 12.813552856445312, 27.52569580078125, 31.738733291625977, 16.189163208007812, 58.01427459716797, 74.0539779663086, 17.30504608154297, 103.03173828125, 99.59156799316406, 39.1157112121582, 4.766582489013672, -0.4195747375488281, 5.452119827270508, 48.99198913574219, 111.24129486083984, 9.533039093017578, 43.14515686035156, 18.570358276367188, -57.25453186035156, 4.849039077758789, -17.80645751953125, 93.12410736083984, 77.49878692626953, -1.8674545288085938, -6.850484848022461, 9.5640869140625, -24.187255859375, -5.720367431640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000248.npy"}
|
||||
{"epoch": 0.3749055177626606, "step": 249, "batch_size": 64, "mean": 11.772629737854004, "std": 42.527645111083984, "min": -105.34226989746094, "p10": -45.268298339843746, "median": 11.87548828125, "p90": 70.3289291381836, "max": 88.97222900390625, "pos_frac": 0.6875, "sample": [12.708320617675781, -39.027984619140625, 5.718864440917969, 51.74810791015625, 13.870269775390625, -24.965042114257812, -57.09855651855469, 37.77410125732422, 21.277740478515625, 37.632049560546875, 36.886138916015625, 68.37976837158203, 4.532020568847656, 71.1642837524414, -80.00259399414062, -13.832351684570312, 27.712234497070312, -52.941627502441406, -53.12591552734375, -37.97529602050781, 4.484712600708008, -0.4891815185546875, -32.787803649902344, 6.069568634033203, 37.73640441894531, 11.042655944824219, -0.3771648406982422, -12.155014038085938, 80.26188659667969, -16.286056518554688, -25.782493591308594, 78.34506225585938, 10.88470458984375, 33.46833801269531, 44.315128326416016, -54.767120361328125, 21.154151916503906, 3.7785797119140625, 13.529762268066406, -31.1171875, 6.029699325561523, 88.97222900390625, 59.87675476074219, 62.51739501953125, 13.2672119140625, 7.407573699951172, -47.942718505859375, 36.67037582397461, 20.71924591064453, 14.776878356933594, 37.77751922607422, 81.47615051269531, 49.870269775390625, 0.8272323608398438, 15.772008895874023, 15.645538330078125, -22.6317081451416, 68.18936157226562, 87.56986999511719, -105.34226989746094, 85.40324401855469, -30.03852653503418, 0.9870758056640625, 3.904449462890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000249.npy"}
|
||||
{"epoch": 0.3764172335600907, "step": 250, "batch_size": 64, "mean": 30.234956741333008, "std": 41.85040283203125, "min": -103.33146667480469, "p10": -5.788530158996582, "median": 29.247496604919434, "p90": 85.25342483520508, "max": 106.515625, "pos_frac": 0.8125, "sample": [52.05888366699219, 11.850034713745117, -5.954713821411133, 13.683446884155273, 22.95635986328125, 31.943374633789062, 45.047027587890625, 97.89309692382812, 86.7385025024414, 3.0263671875, 70.22111511230469, -34.044212341308594, 51.931243896484375, 11.683380126953125, 59.492584228515625, 0.28928184509277344, -1.936492919921875, -103.33146667480469, 65.70195007324219, 6.825736999511719, 36.93269348144531, -5.21101188659668, 29.87005615234375, -50.33906555175781, 75.11515045166016, 0.929931640625, 92.41197204589844, 39.878868103027344, 1.2772064208984375, 36.45091247558594, 7.032512664794922, 29.55059814453125, 84.10393524169922, 4.949714660644531, 36.639801025390625, 30.158187866210938, 0.05689239501953125, 77.41009521484375, 103.5376205444336, 17.169921875, -59.96687316894531, -1.5403785705566406, 33.7492561340332, 2.9146728515625, 102.59812927246094, 66.51274108886719, 51.026763916015625, 67.08511352539062, 85.74606323242188, 75.71720886230469, -8.339920043945312, -15.797843933105469, 68.80917358398438, 4.636995315551758, 5.77313232421875, 8.079490661621094, 106.515625, 74.56989288330078, 15.255451202392578, -0.38179969787597656, -5.400768280029297, 83.25965118408203, 28.944395065307617, 11.269439697265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000250.npy"}
|
||||
{"epoch": 0.3779289493575208, "step": 251, "batch_size": 64, "mean": 19.693090438842773, "std": 43.058170318603516, "min": -86.85629272460938, "p10": -33.90027008056641, "median": 16.30752944946289, "p90": 74.39882431030276, "max": 108.3720932006836, "pos_frac": 0.6875, "sample": [1.9196758270263672, 95.12471008300781, 97.4593734741211, -0.31999778747558594, 42.65918731689453, 55.69916915893555, -86.85629272460938, 25.65786361694336, 35.52318572998047, 108.3720932006836, 5.328163146972656, -73.10216522216797, 4.306854248046875, -29.461326599121094, 12.718929290771484, -25.755451202392578, 57.60298538208008, 104.31912231445312, 13.140684127807617, -65.59922790527344, -32.552757263183594, 95.05406951904297, -1.5680522918701172, -38.24955749511719, 28.855972290039062, -17.309532165527344, 15.356147766113281, 62.61688232421875, -17.111778259277344, -0.7687454223632812, 81.03822326660156, 67.61986541748047, 20.482837677001953, 27.13790512084961, 46.604248046875, 33.005287170410156, 49.03160858154297, 43.04853820800781, -51.18042755126953, -0.7103672027587891, 53.76630783081055, 3.25128173828125, 61.042808532714844, 13.861652374267578, 12.651092529296875, 34.13569641113281, 17.2589111328125, 13.381683349609375, 27.58642578125, 42.72100830078125, 77.30409240722656, 3.8646087646484375, 51.27375411987305, -28.22509002685547, -4.155021667480469, -1.3850250244140625, 50.622928619384766, -40.57623291015625, 29.5835018157959, -34.47777557373047, -28.037582397460938, 36.63763427734375, 64.87708282470703, 14.256078720092773], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000251.npy"}
|
||||
{"epoch": 0.3794406651549509, "step": 252, "batch_size": 64, "mean": 17.732070922851562, "std": 46.949302673339844, "min": -100.55307006835938, "p10": -52.65697860717772, "median": 13.916693687438965, "p90": 80.35983200073242, "max": 109.82513427734375, "pos_frac": 0.734375, "sample": [37.07970428466797, -73.29548645019531, -13.670328140258789, -61.988067626953125, 45.80805206298828, -13.840873718261719, 8.661746978759766, 49.432220458984375, 0.20317649841308594, 94.36278533935547, 80.54624938964844, 55.83946990966797, 16.73340606689453, 1.4875564575195312, 102.43639373779297, -12.406387329101562, 97.52342987060547, -58.60814666748047, 12.388572692871094, -0.6894168853759766, -61.37272644042969, 17.235849380493164, 29.83111572265625, 10.36505126953125, 52.10978698730469, 7.405731201171875, 13.324951171875, -19.827125549316406, 77.31057739257812, 7.972316741943359, 51.209083557128906, 109.48118591308594, 67.36711120605469, 9.6224365234375, 33.07078552246094, 4.872241973876953, 21.306884765625, 14.50843620300293, 31.071229934692383, -32.6314582824707, 47.13376235961914, -26.010780334472656, 0.6300201416015625, 64.87686157226562, 1.8302154541015625, 109.82513427734375, -38.77091979980469, 3.677215576171875, -100.55307006835938, -72.04719543457031, -18.77829933166504, 8.370849609375, 67.45559692382812, -61.55522155761719, 6.796974182128906, 85.59346008300781, 27.30218505859375, 79.92485809326172, 19.167572021484375, 48.01311492919922, 50.303466796875, 21.574630737304688, -19.0543270111084, 16.90888214111328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000252.npy"}
|
||||
{"epoch": 0.38095238095238093, "step": 253, "batch_size": 64, "mean": 21.692380905151367, "std": 41.149497985839844, "min": -69.13246154785156, "p10": -31.689813804626464, "median": 15.554915428161621, "p90": 78.63825149536133, "max": 105.164306640625, "pos_frac": 0.703125, "sample": [58.75391387939453, 1.4346160888671875, 96.19725036621094, 6.145347595214844, -2.6949119567871094, 32.311885833740234, 25.199722290039062, 72.93960571289062, 62.244651794433594, 25.246580123901367, 55.761436462402344, -5.545234680175781, 69.75434875488281, 2.6918869018554688, -2.471952438354492, -69.13246154785156, -16.48688507080078, 40.610572814941406, -31.871788024902344, 50.53289031982422, 44.28712463378906, 53.62123107910156, -52.882808685302734, 51.6888427734375, 16.716474533081055, 1.9331207275390625, 78.70356750488281, 17.572830200195312, -4.440788269042969, 36.46015930175781, 12.737823486328125, 8.3033447265625, 76.63671875, -4.399837493896484, 9.493118286132812, 105.164306640625, 27.653579711914062, -65.1296157836914, 9.083358764648438, 29.918916702270508, 7.829864501953125, -11.378677368164062, 20.508636474609375, -4.511791229248047, 93.54792022705078, -28.975160598754883, -14.87053108215332, 31.576332092285156, 96.35888671875, 24.81140899658203, 16.879806518554688, 78.92941284179688, 104.81195831298828, 7.281227111816406, 78.48584747314453, -31.946456909179688, 14.393356323242188, -31.265207290649414, 65.89116668701172, -34.63232421875, -35.23541259765625, -3.726297378540039, 10.347711563110352, 8.45773696899414], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000253.npy"}
|
||||
{"epoch": 0.382464096749811, "step": 254, "batch_size": 64, "mean": 25.519224166870117, "std": 42.81383514404297, "min": -76.8991470336914, "p10": -16.225579452514648, "median": 12.399147033691406, "p90": 88.279402923584, "max": 103.868896484375, "pos_frac": 0.671875, "sample": [-2.0720748901367188, 19.920654296875, 85.4664306640625, 3.5370635986328125, -8.6346435546875, 68.91140747070312, 64.09101867675781, 82.14358520507812, 6.431581497192383, 0.08864593505859375, -3.2708969116210938, 10.901893615722656, 50.63770294189453, 73.59259796142578, 71.43614959716797, 78.06303405761719, 103.868896484375, 22.97148895263672, -21.081871032714844, 54.50859832763672, -9.121513366699219, 100.41278076171875, -13.674751281738281, 94.8792953491211, -8.657157897949219, -76.8991470336914, -16.937515258789062, 99.68775177001953, 24.005903244018555, -1.8407859802246094, 99.19544219970703, -25.464279174804688, 5.343841552734375, -1.9178581237792969, -2.9638328552246094, 30.306175231933594, 3.110095977783203, 77.73896789550781, 0.5800819396972656, 52.50220489501953, 13.896400451660156, -14.564395904541016, 15.282520294189453, -26.335769653320312, -12.367361068725586, 97.6016616821289, 4.340568542480469, 89.4849624633789, 29.732818603515625, 20.22587013244629, 16.751876831054688, -24.716514587402344, -14.489204406738281, 82.85621643066406, 73.18516540527344, -5.855445861816406, 29.091712951660156, 3.2507667541503906, 68.86897277832031, 49.19756317138672, 7.088510513305664, -12.265625, 7.2846527099609375, -56.112571716308594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000254.npy"}
|
||||
{"epoch": 0.3839758125472411, "step": 255, "batch_size": 64, "mean": 28.64180564880371, "std": 46.67303466796875, "min": -84.26091766357422, "p10": -20.201245880126947, "median": 18.489368438720703, "p90": 100.36015167236329, "max": 120.45272827148438, "pos_frac": 0.765625, "sample": [94.06057739257812, 26.658599853515625, 120.45272827148438, 38.426361083984375, 70.71269226074219, 46.91351318359375, -4.361381530761719, -12.821098327636719, 10.912490844726562, 77.70124816894531, -44.93656921386719, -0.6973724365234375, 19.599472045898438, 57.703758239746094, -43.827720642089844, 6.659355163574219, 45.617225646972656, 12.489982604980469, 16.098587036132812, -84.26091766357422, 96.74319458007812, -6.291343688964844, 15.376541137695312, 8.205612182617188, 104.07345581054688, 58.466957092285156, -51.12176513671875, 22.151782989501953, 108.06055450439453, -10.699981689453125, 98.6090087890625, 72.96385955810547, -23.364166259765625, 42.37764358520508, 8.9736328125, 14.651636123657227, 8.943429946899414, 44.478790283203125, 105.87891387939453, 12.331365585327148, 4.429441452026367, 48.09611511230469, 78.07907104492188, -1.7582244873046875, 23.174715042114258, -1.6979808807373047, 17.37926483154297, 111.19349670410156, -53.491661071777344, 39.10741424560547, 2.1427040100097656, 102.59794616699219, 11.169502258300781, 101.11064147949219, 41.428619384765625, 39.37590026855469, 3.1521263122558594, 56.961368560791016, 69.87380981445312, -77.15214538574219, 0.0322265625, 0.7391128540039062, 36.22193908691406, -3.0004196166992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000255.npy"}
|
||||
{"epoch": 0.3854875283446712, "step": 256, "batch_size": 64, "mean": 28.2855224609375, "std": 50.66074752807617, "min": -105.9156723022461, "p10": -23.23787899017334, "median": 26.980589866638184, "p90": 97.41389389038088, "max": 118.01513671875, "pos_frac": 0.6875, "sample": [-26.67058563232422, 28.664840698242188, 57.484596252441406, 41.56711959838867, 57.89161682128906, -35.61372375488281, -23.790647506713867, 100.83663940429688, 38.90214538574219, -7.461538314819336, -52.5577392578125, 1.7486438751220703, -19.532806396484375, 107.88197326660156, 16.375732421875, 108.46070098876953, 1.5043182373046875, 8.481338500976562, -105.9156723022461, 63.37828063964844, 4.816356658935547, 53.70361328125, -15.516111373901367, 18.63542938232422, -17.683151245117188, -80.06026458740234, 45.5572395324707, -6.220420837402344, 28.456787109375, 73.86338806152344, -3.0496368408203125, 45.85606384277344, 36.20862579345703, 93.89254760742188, 88.2405014038086, 37.00084686279297, 92.74252319335938, 88.34490203857422, -9.978683471679688, 101.98176574707031, 25.65032196044922, 44.15510559082031, 114.8780517578125, -16.604995727539062, 90.67073822021484, 24.080158233642578, 86.13612365722656, 8.145416259765625, 28.222318649291992, 118.01513671875, -19.72315216064453, 86.31404876708984, 2.60479736328125, 4.933967590332031, 76.96717834472656, -21.94808578491211, 51.73284912109375, -8.740623474121094, -1.6108341217041016, 98.92304229736328, -76.1144027709961, 25.738861083984375, -18.093994140625, 47.51393127441406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000256.npy"}
|
||||
{"epoch": 0.3869992441421013, "step": 257, "batch_size": 64, "mean": 32.24604797363281, "std": 49.484188079833984, "min": -95.06871032714844, "p10": -11.283410644531248, "median": 17.497591018676758, "p90": 102.37301025390626, "max": 142.76089477539062, "pos_frac": 0.734375, "sample": [-0.7311763763427734, 15.1549072265625, -12.477935791015625, 142.76089477539062, 110.75057983398438, 40.08524703979492, 5.006074905395508, 69.7630615234375, 33.33934020996094, 72.26663208007812, 103.00175476074219, 15.637809753417969, 4.871797561645508, -6.515411376953125, 78.79669189453125, 74.06175994873047, 99.75146484375, 0.8996124267578125, -29.94329833984375, -2.5818862915039062, 4.875215530395508, 36.78459930419922, 32.9381103515625, 12.66561508178711, 44.340309143066406, -5.1740875244140625, -2.667743682861328, 106.70557403564453, 82.88188934326172, -0.7365264892578125, 81.48397827148438, 99.40272521972656, 16.049545288085938, 4.598594665527344, 40.37812042236328, 16.03234100341797, -8.496185302734375, 98.2563247680664, 113.50477600097656, -68.28865051269531, -95.06871032714844, 95.4537353515625, 33.46314239501953, 29.730918884277344, -0.1455841064453125, -36.07554626464844, 1.0139694213867188, 0.1678009033203125, 50.515403747558594, 59.208335876464844, 74.28878021240234, 11.937515258789062, 104.82013702392578, 18.945636749267578, -66.64508056640625, 102.68231201171875, -1.630584716796875, -33.375282287597656, 101.65130615234375, 10.3211669921875, 58.22346115112305, 19.40118408203125, 13.867782592773438, -8.437263488769531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000257.npy"}
|
||||
{"epoch": 0.3885109599395314, "step": 258, "batch_size": 64, "mean": 27.488344192504883, "std": 37.26266860961914, "min": -75.04412841796875, "p10": -1.7849756240844696, "median": 26.764179229736328, "p90": 76.05202636718754, "max": 115.61334228515625, "pos_frac": 0.890625, "sample": [39.7177734375, 5.5539093017578125, 34.92401123046875, -3.079427719116211, 8.597099304199219, 85.94234466552734, 112.07232666015625, 35.272430419921875, 60.400787353515625, 2.5564327239990234, 13.131261825561523, 26.287017822265625, -60.76432800292969, 22.221895217895508, 44.35521697998047, 65.41848754882812, 1.23541259765625, -12.010417938232422, 41.894866943359375, -59.72931671142578, 6.926433563232422, 41.20851135253906, -54.09291076660156, 28.417518615722656, 12.984451293945312, 53.10961151123047, 16.316619873046875, 46.35527038574219, 20.68490982055664, 15.746749877929688, 24.212890625, 4.164859771728516, 51.830787658691406, 66.19713592529297, 19.951112747192383, 9.438056945800781, 6.093208312988281, 16.055816650390625, 42.6053466796875, 115.61334228515625, 31.462806701660156, -12.045166015625, 103.58380126953125, 46.706756591796875, 34.109649658203125, 95.88386535644531, 54.83784484863281, 49.420345306396484, 4.333366394042969, 80.27555084228516, 4.714591979980469, 28.6422119140625, 43.039894104003906, 27.24134063720703, 13.290008544921875, 5.79277229309082, 28.86016845703125, -75.04412841796875, 6.28692626953125, 3.431032180786133, 36.80034637451172, 48.15525817871094, 85.53629302978516, 6.12098503112793], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000258.npy"}
|
||||
{"epoch": 0.3900226757369615, "step": 259, "batch_size": 64, "mean": 41.57228088378906, "std": 49.79879379272461, "min": -69.35791015625, "p10": -29.78741188049314, "median": 49.15283966064453, "p90": 103.21924285888673, "max": 122.22708129882812, "pos_frac": 0.796875, "sample": [-4.934364318847656, -65.6029052734375, -63.602783203125, 80.92987060546875, 32.9090576171875, 99.97325134277344, 99.99748229980469, 100.74281311035156, 54.93339538574219, -69.35791015625, 67.61164855957031, -3.7376022338867188, 32.666473388671875, 85.159912109375, 60.72611618041992, -4.885986328125, -39.59770965576172, 3.4525146484375, 111.36248779296875, 29.45440673828125, 54.645565032958984, 80.49772644042969, 95.41049194335938, 101.06988525390625, 64.24231719970703, -6.896717071533203, -46.820281982421875, 30.60393524169922, 8.758049011230469, 10.55742073059082, 81.89190673828125, 67.71017456054688, -3.5170211791992188, 30.332984924316406, 65.78120422363281, 46.82044982910156, 107.73413848876953, 3.278564453125, 85.78846740722656, 56.69935607910156, 104.14039611816406, 53.04709243774414, 27.053451538085938, 122.22708129882812, 7.615684509277344, 5.401641845703125, -45.01756286621094, -5.992841720581055, -62.14025115966797, 1.5559577941894531, 114.313232421875, 108.19001770019531, 80.74447631835938, 60.2550048828125, 51.4852294921875, 72.51364135742188, 35.225318908691406, 36.69499969482422, 13.96234130859375, 39.43944549560547, 80.89741516113281, 106.9303207397461, 64.77359008789062, 44.521339416503906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000259.npy"}
|
||||
{"epoch": 0.3915343915343915, "step": 260, "batch_size": 64, "mean": 32.64699935913086, "std": 51.2219352722168, "min": -87.23762512207031, "p10": -30.387182617187495, "median": 24.605728149414062, "p90": 102.31868896484374, "max": 110.73388671875, "pos_frac": 0.671875, "sample": [106.028076171875, -43.603607177734375, 76.57548522949219, -23.18063735961914, 2.7207374572753906, 108.03880310058594, 35.15760803222656, 23.174888610839844, 105.75204467773438, -45.561981201171875, 96.54119873046875, 66.73955535888672, 90.76434326171875, 58.302978515625, 86.70970916748047, -10.954143524169922, 101.88182067871094, 58.20439147949219, 21.190061569213867, 107.56004333496094, 85.18949127197266, 11.878448486328125, 106.27059173583984, 102.4018325805664, -10.781295776367188, 25.740764617919922, 83.75440979003906, 102.12468719482422, -3.225494384765625, -6.5958404541015625, 9.873580932617188, 14.603500366210938, 83.2076644897461, 26.35533905029297, -0.25250816345214844, 47.1254768371582, 19.3819580078125, -25.227203369140625, -87.23762512207031, 24.826148986816406, 1.7897834777832031, 54.31549835205078, -0.3221149444580078, -4.376533508300781, 18.53864288330078, 53.571510314941406, 110.73388671875, -21.9952392578125, 99.2154541015625, -12.59283447265625, -46.51374053955078, 62.20240783691406, 24.38530731201172, 6.195953369140625, 77.13921356201172, 51.30220031738281, -39.823974609375, 85.21590423583984, 68.1357421875, -32.598602294921875, -0.2581291198730469, -69.55780792236328, -24.802242279052734, -1.9476547241210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000260.npy"}
|
||||
{"epoch": 0.3930461073318216, "step": 261, "batch_size": 64, "mean": 27.57048797607422, "std": 44.87925720214844, "min": -57.510009765625, "p10": -21.568194961547853, "median": 17.468833923339844, "p90": 97.35177612304689, "max": 113.23441314697266, "pos_frac": 0.671875, "sample": [3.872394561767578, 28.193405151367188, 38.445228576660156, 6.196733474731445, -13.619537353515625, 55.00423812866211, -1.0246124267578125, 106.14559936523438, 25.977706909179688, -13.44035530090332, -21.216285705566406, -12.416091918945312, -5.7011566162109375, 104.68330383300781, 70.2108154296875, -16.760055541992188, -1.5040054321289062, 111.56561279296875, 3.4910755157470703, 102.62446594238281, 11.489860534667969, 37.951820373535156, -21.719013214111328, -35.31146240234375, 94.73233032226562, 48.440711975097656, 113.23441314697266, 35.509761810302734, -5.912788391113281, 30.64207649230957, 41.72260665893555, 68.2392578125, 51.11785888671875, 98.47439575195312, 31.877477645874023, -4.130701065063477, 99.3651123046875, -57.510009765625, -33.15386962890625, 76.75393676757812, 27.14215850830078, 7.685140609741211, 68.56788635253906, 71.34957122802734, 11.755573272705078, -56.199920654296875, 2.3350467681884766, -25.38714599609375, -52.80732727050781, 9.972293853759766, 35.231536865234375, -8.607345581054688, 77.8447265625, 90.22398376464844, 16.810272216796875, -19.91387176513672, 49.4182243347168, 18.127395629882812, -4.0963287353515625, 76.7882080078125, 12.273414611816406, 88.2350845336914, 16.565826416015625, -1.3455944061279297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000261.npy"}
|
||||
{"epoch": 0.3945578231292517, "step": 262, "batch_size": 64, "mean": 27.376060485839844, "std": 42.46787643432617, "min": -62.448890686035156, "p10": -30.87482223510742, "median": 22.76311683654785, "p90": 85.07901306152345, "max": 111.17054748535156, "pos_frac": 0.71875, "sample": [-36.96833801269531, 44.995094299316406, 9.586372375488281, -0.40937232971191406, 2.696136474609375, 82.10763549804688, 7.259552001953125, 4.15802001953125, 22.10498809814453, 88.37921905517578, -5.6632232666015625, 71.37353515625, -34.91326141357422, 6.772893905639648, 64.2783203125, -42.16221237182617, 31.581804275512695, 92.3968505859375, 37.380340576171875, 46.8902587890625, -62.448890686035156, -5.815452575683594, 3.8158493041992188, -0.5706939697265625, 26.938095092773438, 71.12675476074219, 9.707427978515625, 111.17054748535156, -35.53993225097656, 9.925226211547852, 86.23892211914062, 51.51409912109375, 23.443023681640625, 80.23042297363281, -8.177986145019531, 97.96964263916016, 5.763128280639648, 23.421245574951172, 106.61257934570312, 1.3968963623046875, -4.546648025512695, -28.597808837890625, -21.503021240234375, 69.06289672851562, 82.37255859375, 44.202613830566406, 44.96630096435547, 7.06987190246582, -31.850685119628906, 25.55704116821289, 62.6702880859375, -4.491584777832031, 73.76399993896484, -5.558130264282227, 43.867820739746094, 30.149085998535156, 79.26568603515625, 89.09693908691406, 3.392578125, 67.71282958984375, 8.323013305664062, -14.345252990722656, 77.58879852294922, -34.66692352294922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000262.npy"}
|
||||
{"epoch": 0.3960695389266818, "step": 263, "batch_size": 64, "mean": 26.370563507080078, "std": 43.74665832519531, "min": -85.82340240478516, "p10": -19.543958663940426, "median": 20.42978858947754, "p90": 81.73539505004884, "max": 106.39118957519531, "pos_frac": 0.71875, "sample": [39.1587028503418, 60.67509841918945, -39.140846252441406, -13.094383239746094, -8.366334915161133, -13.512779235839844, 78.16709899902344, 3.3084869384765625, 79.4522933959961, 77.56844329833984, 46.86541748046875, 1.6930484771728516, -5.203681945800781, 45.82297897338867, -0.10193634033203125, -44.183143615722656, 82.26303100585938, -11.12335205078125, 17.995849609375, -10.560096740722656, 48.15013122558594, 80.53842163085938, 104.59053039550781, 25.05654525756836, 11.29931640625, 2.2651405334472656, 82.21263885498047, 57.63240051269531, 82.86646270751953, 66.7298812866211, 103.33998107910156, 17.641189575195312, -26.936370849609375, 6.463958740234375, -6.612945556640625, 35.46051025390625, -85.82340240478516, 72.5504150390625, 36.48131561279297, 80.621826171875, 18.687286376953125, -7.885063171386719, -14.283149719238281, 80.10877227783203, -22.373680114746094, 2.4160003662109375, 29.372299194335938, 75.43416595458984, 47.93944549560547, 22.172290802001953, 4.532867431640625, 106.39118957519531, 4.3158416748046875, 5.733745574951172, 33.019622802734375, -20.3922176361084, 30.92957305908203, 72.24928283691406, 12.557662963867188, -17.564687728881836, 5.626251220703125, -84.37913513183594, 87.42962646484375, 33.466163635253906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000263.npy"}
|
||||
{"epoch": 0.3975812547241119, "step": 264, "batch_size": 64, "mean": 29.263023376464844, "std": 46.44760513305664, "min": -88.31535339355469, "p10": -30.00752716064453, "median": 25.09530258178711, "p90": 92.60495986938477, "max": 115.96514892578125, "pos_frac": 0.734375, "sample": [50.44927978515625, -34.05360412597656, 23.828651428222656, 0.6872024536132812, 1.4457073211669922, -16.534738540649414, 5.1640777587890625, 0.6099853515625, 67.13984680175781, 22.920974731445312, -41.479644775390625, 3.757343292236328, -88.31535339355469, 55.36712646484375, -23.699127197265625, 28.235214233398438, 55.402015686035156, 66.66264343261719, 11.809303283691406, 93.39865112304688, 78.63772583007812, 47.93064498901367, 89.66059875488281, 39.957733154296875, 49.03167724609375, 113.68476867675781, -1.883829116821289, 25.948486328125, -9.56182861328125, 37.61493682861328, 8.372489929199219, -1.5529251098632812, 84.10735321044922, 2.382152557373047, -10.229068756103516, 32.72380828857422, 102.0120849609375, -21.18939208984375, 73.68123626708984, 104.88314819335938, -31.559112548828125, -14.183967590332031, 24.24211883544922, 111.6915283203125, 53.73554229736328, 74.77201843261719, -53.54095458984375, 47.66792297363281, -34.11329650878906, 13.156463623046875, -26.387161254882812, 1.8002700805664062, 107.25094604492188, 40.40462875366211, 51.373043060302734, -35.280765533447266, 22.406532287597656, 67.2403564453125, 33.634521484375, -1.8065681457519531, 1.440948486328125, 115.96514892578125, 83.16294860839844, 90.75301361083984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000264.npy"}
|
||||
{"epoch": 0.39909297052154197, "step": 265, "batch_size": 64, "mean": 33.32872772216797, "std": 54.97273254394531, "min": -100.53297424316406, "p10": -33.46467285156249, "median": 23.357924461364746, "p90": 105.22319793701172, "max": 116.92910766601562, "pos_frac": 0.75, "sample": [9.14385986328125, 1.7250957489013672, -1.9586639404296875, -36.373046875, 6.224308013916016, -4.699455261230469, 35.07163619995117, 21.393043518066406, -53.09898376464844, 105.37432861328125, 34.208003997802734, 14.872291564941406, 44.52267837524414, 61.44867706298828, -100.53297424316406, -11.467140197753906, 55.64618682861328, 1.780496597290039, 16.22718620300293, -79.24412536621094, 23.2559814453125, 93.50909423828125, 98.29039764404297, 23.27119255065918, 5.705007553100586, -12.219402313232422, 88.08692169189453, 5.461845397949219, 37.145774841308594, 10.469717025756836, 111.92315673828125, 116.92910766601562, -26.678466796875, 102.62704467773438, 68.52806091308594, 109.12337493896484, 2.4298229217529297, 19.574962615966797, -4.1164398193359375, -23.577896118164062, 35.46092987060547, 94.271240234375, 103.5538558959961, 107.8988265991211, 103.68751525878906, 82.34889221191406, 110.02934265136719, 45.27471160888672, 17.2855224609375, -52.76023483276367, 23.444656372070312, 46.01507568359375, -12.24319076538086, 104.87055969238281, 97.52545166015625, 98.74329376220703, 111.42897033691406, -75.12409210205078, 48.41815948486328, -7.023929595947266, 68.35404205322266, 6.103096008300781, 76.53079986572266, -71.05755615234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000265.npy"}
|
||||
{"epoch": 0.40060468631897206, "step": 266, "batch_size": 64, "mean": 31.203144073486328, "std": 48.3275146484375, "min": -94.48348236083984, "p10": -15.181680679321286, "median": 24.49022674560547, "p90": 101.72192993164063, "max": 128.2071533203125, "pos_frac": 0.75, "sample": [-11.794281005859375, 106.39875793457031, 102.1601791381836, 4.378839492797852, 42.71363067626953, -1.381826400756836, -4.960472106933594, -51.56532287597656, 4.113271713256836, 36.949974060058594, 37.05592727661133, 9.262786865234375, 7.524955749511719, 58.543052673339844, 61.046546936035156, 63.18104934692383, 3.1417388916015625, 128.2071533203125, -16.619396209716797, -11.827011108398438, 107.06671142578125, 22.900802612304688, 2.9422950744628906, 110.21868133544922, 43.984214782714844, 110.86408996582031, 2.6666259765625, 88.48126220703125, 45.68488311767578, -22.088478088378906, 98.62686157226562, 35.321624755859375, 100.69934844970703, -2.1865272521972656, 44.442352294921875, -6.942604064941406, 42.03662109375, 61.57890319824219, 74.47389221191406, -4.047786712646484, 1.7150955200195312, -41.18913269042969, -41.30939483642578, 6.846057891845703, -94.48348236083984, 10.199920654296875, 26.07965087890625, 12.175556182861328, 39.01685333251953, 73.2480239868164, -68.59046173095703, 3.4687938690185547, -5.916595458984375, 41.301910400390625, 91.29728698730469, 47.88277816772461, 113.6352310180664, -3.684408187866211, 69.45161437988281, 6.307861328125, 82.23046875, 3.9602108001708984, 97.72882080078125, 2.375255584716797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000266.npy"}
|
||||
{"epoch": 0.4021164021164021, "step": 267, "batch_size": 64, "mean": 24.190324783325195, "std": 50.78315353393555, "min": -111.72854614257812, "p10": -30.795170593261712, "median": 23.636430740356445, "p90": 94.6119613647461, "max": 117.94424438476562, "pos_frac": 0.703125, "sample": [74.18782043457031, 1.6743316650390625, 1.9462814331054688, -90.4051513671875, 5.4395599365234375, -19.466320037841797, 76.74012756347656, 108.24061584472656, 70.85256958007812, 1.1088829040527344, -7.252950668334961, 34.80479431152344, 5.233463287353516, 115.56079864501953, 2.4832801818847656, -52.15960693359375, 34.65641784667969, -51.33139419555664, -10.517074584960938, -96.12062072753906, -32.95386505126953, -19.67388153076172, -8.227546691894531, 54.452205657958984, 70.2303466796875, 93.28910064697266, -0.142181396484375, 14.070709228515625, 76.41679382324219, -25.758216857910156, 35.92246627807617, 1.2736015319824219, 95.17890167236328, 60.8435173034668, -21.68846893310547, 27.695911407470703, 39.14720916748047, -7.1457061767578125, 1.8048553466796875, 117.94424438476562, 27.547256469726562, 0.4319000244140625, 86.31097412109375, -1.19207763671875, 91.25724029541016, 64.37345886230469, 103.3949203491211, 25.52611541748047, -111.72854614257812, 99.46097564697266, 33.14599609375, -36.703033447265625, -1.950235366821289, 24.281078338623047, 24.943572998046875, 13.405590057373047, 59.14677810668945, 52.372291564941406, 99.42963409423828, 28.158645629882812, 4.339801788330078, 62.3301887512207, 22.991783142089844, -1.449371337890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000267.npy"}
|
||||
{"epoch": 0.4036281179138322, "step": 268, "batch_size": 64, "mean": 29.83588218688965, "std": 42.127437591552734, "min": -62.23527526855469, "p10": -8.485407257080077, "median": 16.227858543395996, "p90": 94.98853073120117, "max": 124.0283203125, "pos_frac": 0.671875, "sample": [11.282928466796875, 114.7273178100586, 0.07116317749023438, -7.253662109375, 82.80783081054688, 99.73360443115234, 30.710540771484375, -5.847633361816406, -22.65225601196289, 33.7184944152832, 88.34332275390625, -0.1215667724609375, -0.7731342315673828, 10.766899108886719, 18.459121704101562, 16.684585571289062, 43.210121154785156, -15.158950805664062, -0.5968017578125, 95.139892578125, 65.22691345214844, 75.62101745605469, -2.3379459381103516, 6.0022430419921875, -5.958564758300781, 19.77862548828125, 18.59991455078125, 63.75103759765625, 12.92901611328125, -4.96954345703125, 93.2940673828125, 3.902576446533203, 22.03293228149414, 14.54046630859375, -11.483016967773438, 94.6353530883789, -4.287055969238281, -12.06081771850586, -43.449188232421875, 15.77113151550293, 67.53044128417969, 8.435407638549805, -1.8895034790039062, 82.58198547363281, 60.134483337402344, 39.80247497558594, 103.89634704589844, -2.3334503173828125, 12.436279296875, -62.23527526855469, -2.8859710693359375, -9.013298034667969, 52.45521545410156, -0.7324600219726562, 124.0283203125, 63.19273376464844, 24.021652221679688, 31.5341796875, 104.94977569580078, 35.987361907958984, -0.9711437225341797, 110.48072814941406, 49.39081954956055, 3.908344268798828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000268.npy"}
|
||||
{"epoch": 0.4051398337112623, "step": 269, "batch_size": 64, "mean": 34.92235565185547, "std": 47.532447814941406, "min": -90.74005126953125, "p10": -9.000229835510252, "median": 24.399487495422363, "p90": 103.05923614501954, "max": 128.9991455078125, "pos_frac": 0.75, "sample": [-3.774627685546875, -29.706939697265625, 9.963607788085938, 97.5718994140625, 33.549285888671875, -5.0081634521484375, 27.567825317382812, 97.18525695800781, 14.033065795898438, -6.601043701171875, -9.602272033691406, 27.987533569335938, 53.35210418701172, 35.221702575683594, 108.05352783203125, 93.95610809326172, -0.7047996520996094, 52.13490295410156, 126.61022186279297, -69.16316986083984, 30.841781616210938, -2.05938720703125, 93.56319427490234, 26.37506103515625, 7.5623779296875, 105.55340576171875, -27.38543128967285, 128.9991455078125, 51.980812072753906, 99.7529067993164, 22.423913955688477, 15.88089370727539, -7.595464706420898, 62.80589294433594, -11.498245239257812, 3.3266353607177734, 2.7423973083496094, 38.55442810058594, -0.17766571044921875, 93.38383483886719, 105.3388442993164, 53.51638412475586, 10.527931213378906, 8.064676284790039, 104.47623443603516, -27.98504638671875, 63.797935485839844, 19.224395751953125, -1.4709434509277344, 5.58154296875, 92.88006591796875, 0.1127166748046875, 108.28175354003906, 15.90704345703125, -3.0945968627929688, -90.74005126953125, 9.64019775390625, 68.746826171875, 38.33140563964844, 55.68177795410156, 86.07240295410156, 14.732925415039062, 90.82258605957031, 18.92717170715332], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000269.npy"}
|
||||
{"epoch": 0.40665154950869237, "step": 270, "batch_size": 64, "mean": 27.65694236755371, "std": 49.18150329589844, "min": -100.6243896484375, "p10": -9.665572357177734, "median": 16.768413543701172, "p90": 103.05458068847658, "max": 117.28436279296875, "pos_frac": 0.765625, "sample": [54.35888671875, 109.77980041503906, -7.592826843261719, 117.28436279296875, 48.87812805175781, 112.78865051269531, 7.569942474365234, 35.81034469604492, 73.76644897460938, 20.748214721679688, 117.27686309814453, 79.19563293457031, 17.077476501464844, 66.71856689453125, 3.1757850646972656, -51.00929260253906, 33.96763610839844, 53.418739318847656, 16.088581085205078, 66.35302734375, 33.290924072265625, 69.82624816894531, 11.148674011230469, -48.896175384521484, -3.1308746337890625, 6.237274169921875, 91.57408905029297, -91.61459350585938, 35.242183685302734, 10.450733184814453, 16.4593505859375, 14.229019165039062, 4.9658203125, 43.010047912597656, -5.698089599609375, 4.551200866699219, 106.1020278930664, -8.811477661132812, 13.82900619506836, -100.6243896484375, 110.48611450195312, 70.27167510986328, 91.68772888183594, -6.76738166809082, 9.605213165283203, 33.86418151855469, -2.515289306640625, 4.57806396484375, 103.98594665527344, 44.35682678222656, 100.88139343261719, 0.7514915466308594, -19.276138305664062, 52.10639953613281, 1.6054878234863281, -9.986892700195312, -8.915824890136719, -92.50018310546875, 57.3956298828125, 8.078506469726562, 19.250091552734375, -4.189727783203125, 8.119888305664062, 19.375137329101562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000270.npy"}
|
||||
{"epoch": 0.40816326530612246, "step": 271, "batch_size": 64, "mean": 22.31073570251465, "std": 55.53260803222656, "min": -88.64500427246094, "p10": -31.13250331878662, "median": 3.2657470703125, "p90": 107.03026504516602, "max": 148.53985595703125, "pos_frac": 0.609375, "sample": [28.65227508544922, 49.501678466796875, -8.770240783691406, 1.0086746215820312, 112.2718734741211, 88.35777282714844, 15.40880012512207, 3.3590545654296875, 106.38711547851562, -48.251708984375, 58.68804931640625, -5.186819076538086, 131.09243774414062, -20.285537719726562, 66.8866195678711, 97.80787658691406, 2.5368175506591797, -15.027572631835938, 1.3918437957763672, 118.04472351074219, 13.328193664550781, 2.7277908325195312, 88.89506530761719, -7.351078033447266, -14.33734130859375, 0.30641937255859375, -28.922279357910156, 60.626220703125, -3.8448638916015625, -29.821489334106445, 6.685003280639648, 75.57254028320312, 12.794952392578125, -88.64500427246094, -1.74493408203125, 95.46692657470703, 19.679737091064453, 2.9797134399414062, 83.73192596435547, 107.30590057373047, -2.7128143310546875, -53.20173645019531, 148.53985595703125, -50.77936553955078, -8.150421142578125, 18.282745361328125, -5.07135009765625, -80.70325469970703, -84.69264221191406, 3.1724395751953125, 24.07723617553711, -17.23638153076172, 102.42504119873047, 112.37033081054688, 53.28868103027344, -3.99749755859375, -31.694366455078125, 42.700584411621094, -9.497245788574219, 6.470775604248047, -19.301584243774414, -25.343429565429688, 119.04573059082031, 10.588592529296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000271.npy"}
|
||||
{"epoch": 0.40967498110355255, "step": 272, "batch_size": 64, "mean": 45.035953521728516, "std": 44.173397064208984, "min": -74.28671264648438, "p10": -3.834848785400388, "median": 47.97977256774902, "p90": 109.44387435913087, "max": 121.72161102294922, "pos_frac": 0.828125, "sample": [16.280189514160156, -5.024803161621094, -9.804931640625, 63.94401931762695, 53.786399841308594, 114.4033432006836, 19.712493896484375, 95.33805084228516, 105.35750579833984, 9.398979187011719, 87.01327514648438, 50.33696365356445, 16.324859619140625, -0.3768157958984375, 61.26380157470703, 51.94232940673828, 66.92581939697266, 4.094108581542969, 72.51838684082031, 50.90127944946289, 104.4698715209961, 15.803298950195312, 76.10964965820312, 71.93154907226562, 120.25190734863281, 17.44916534423828, 60.755035400390625, -33.77783966064453, 15.123382568359375, 115.6026840209961, 45.622581481933594, 115.55033874511719, 87.72941589355469, 0.07924079895019531, 62.926963806152344, 20.712627410888672, 24.69084930419922, 0.6313629150390625, 32.198875427246094, -74.28671264648438, 89.77996063232422, 116.03923797607422, 121.72161102294922, 52.52758026123047, 2.2179489135742188, 14.844635009765625, 100.38865661621094, 63.311851501464844, 71.31231689453125, 82.27759552001953, 30.525373458862305, 97.2078857421875, -9.124176025390625, -1.05828857421875, 17.30559539794922, 29.115386962890625, 56.46210479736328, 41.38493347167969, -9.433185577392578, -0.6515407562255859, -8.093889236450195, -0.5327682495117188, 111.19517517089844, 9.667490005493164], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000272.npy"}
|
||||
{"epoch": 0.41118669690098264, "step": 273, "batch_size": 64, "mean": 31.832345962524414, "std": 61.13330078125, "min": -119.16233825683594, "p10": -43.5851448059082, "median": 34.72955513000488, "p90": 106.51011352539064, "max": 166.9696044921875, "pos_frac": 0.765625, "sample": [0.7767524719238281, 53.65001678466797, 107.59979248046875, -44.951026916503906, 78.29876708984375, -19.14916229248047, -76.58334350585938, 81.76341247558594, -92.69206237792969, 47.590850830078125, 61.02435302734375, -40.39808654785156, 54.46415710449219, -2.5347957611083984, 5.638740539550781, 31.787067413330078, 112.9967269897461, 23.170612335205078, 95.04081726074219, 93.2203140258789, 84.6554946899414, 6.405567169189453, -6.0224761962890625, 3.542804718017578, -119.16233825683594, 95.43627166748047, 48.55029296875, 2.1800765991210938, -29.895174026489258, 3.2281112670898438, 69.13442993164062, 166.9696044921875, 103.967529296875, -105.85453796386719, 6.77532958984375, 66.25770568847656, 71.28762817382812, 57.343021392822266, 1.0014114379882812, 86.11811828613281, -81.51416778564453, 37.67204284667969, 83.70674896240234, -21.165977478027344, 45.487857818603516, 142.51393127441406, 119.96839141845703, 72.29814910888672, 11.046768188476562, 78.94252014160156, 1.7398357391357422, 28.793533325195312, 1.3864974975585938, -11.7056884765625, 19.794567108154297, 116.5445327758789, 0.4416542053222656, 39.94013214111328, 29.821666717529297, -8.938514709472656, 68.15022277832031, 113.29779815673828, -84.26028442382812, 50.67512512207031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000273.npy"}
|
||||
{"epoch": 0.4126984126984127, "step": 274, "batch_size": 64, "mean": 35.58800506591797, "std": 58.25914001464844, "min": -111.45734405517578, "p10": -35.880258178710925, "median": 29.367904663085938, "p90": 109.92017364501953, "max": 125.9270248413086, "pos_frac": 0.75, "sample": [49.04591369628906, -44.855865478515625, -8.208219528198242, 4.7211151123046875, -10.7860107421875, -65.77426147460938, 107.92221069335938, 108.55836486816406, 25.158279418945312, 6.37104606628418, 69.30684661865234, 95.65733337402344, -3.1925888061523438, -19.34759521484375, 58.83897399902344, 122.40174865722656, 115.20806884765625, -72.24652862548828, 9.596012115478516, 24.986419677734375, 114.34571838378906, 107.90403747558594, 66.94198608398438, 109.7836685180664, 59.12677764892578, 3.2932376861572266, 109.75164031982422, 109.71817016601562, 42.15784454345703, 8.884033203125, 5.890682220458984, 5.097312927246094, 100.93389129638672, -0.780670166015625, -22.82335090637207, 51.04145812988281, -73.7579574584961, -25.707603454589844, 93.86736297607422, 6.416748046875, -40.239967346191406, 38.968048095703125, 22.446765899658203, 20.441665649414062, -9.63835334777832, 32.56622314453125, 26.169586181640625, -111.45734405517578, 37.874290466308594, 38.9560661315918, 115.07320404052734, 105.77037048339844, 2.8612937927246094, 45.11509704589844, 10.00054931640625, -20.873544692993164, -70.672607421875, 10.480365753173828, 117.80195617675781, 109.97867584228516, 78.511962890625, 40.26905059814453, 125.9270248413086, 105.8554458618164], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000274.npy"}
|
||||
{"epoch": 0.41421012849584277, "step": 275, "batch_size": 64, "mean": 35.19570541381836, "std": 50.51906204223633, "min": -110.5087890625, "p10": -12.281184005737304, "median": 22.62023162841797, "p90": 112.10916442871095, "max": 126.37853240966797, "pos_frac": 0.765625, "sample": [28.571014404296875, 12.822853088378906, -15.51800537109375, 40.77410888671875, 108.99858856201172, 19.815513610839844, 37.72479248046875, -110.5087890625, 115.96797943115234, 95.2154541015625, 5.79443359375, 16.563583374023438, -0.5553817749023438, 113.44226837158203, -1.4181976318359375, 9.84256362915039, 83.87313842773438, 32.88597106933594, 106.18504333496094, -31.38934326171875, -92.661376953125, 117.87445068359375, 15.821197509765625, 7.362480163574219, 56.851036071777344, -0.390380859375, -37.33957290649414, 10.846382141113281, -1.5680427551269531, 22.808120727539062, 22.432342529296875, 126.37853240966797, 38.846092224121094, 52.115989685058594, 115.1486587524414, 54.530731201171875, 52.609832763671875, 14.339216232299805, -6.855804443359375, -2.1338462829589844, 19.288543701171875, 19.227569580078125, 28.797523498535156, 1.6183795928955078, 7.32280158996582, 121.44274139404297, 57.7613525390625, 96.74100494384766, -11.099845886230469, 72.80010986328125, 1.7584114074707031, 6.577980041503906, -4.624689102172852, 14.515106201171875, 114.8460464477539, 100.94046783447266, -12.787471771240234, 62.95386505126953, 70.19558715820312, 94.58097839355469, 49.2601318359375, 45.69366455078125, -30.9687442779541, 89.5799331665039], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000275.npy"}
|
||||
{"epoch": 0.41572184429327286, "step": 276, "batch_size": 64, "mean": 34.61402893066406, "std": 50.316558837890625, "min": -94.69320678710938, "p10": -20.354405212402337, "median": 26.229619026184082, "p90": 103.08207931518555, "max": 125.72773742675781, "pos_frac": 0.78125, "sample": [103.17019653320312, 11.123847961425781, -1.241231918334961, 60.181907653808594, 3.733968734741211, 19.369468688964844, 125.72773742675781, -62.23162078857422, 71.66875457763672, 19.686887741088867, 11.416072845458984, 23.6260929107666, 54.75871276855469, 81.23291778564453, -11.577178955078125, 115.52705383300781, -5.626522064208984, 3.043659210205078, 25.131881713867188, 4.650524139404297, 37.39118576049805, 95.57869720458984, 95.82654571533203, 120.83816528320312, 11.24365234375, -48.188873291015625, 89.5132827758789, -29.85784912109375, 44.742897033691406, 102.87647247314453, 8.260242462158203, 9.272674560546875, -39.709205627441406, -94.69320678710938, 86.03414916992188, -23.46014404296875, 79.52978515625, -7.952018737792969, 60.509765625, 85.94576263427734, -64.7251205444336, 19.208419799804688, 112.01615905761719, 1.05352783203125, -9.14996337890625, 36.80488586425781, 95.99928283691406, 43.12937927246094, 99.9427490234375, 22.292205810546875, -7.410774230957031, -13.107681274414062, 10.873085021972656, 28.637500762939453, 19.768768310546875, 120.97531127929688, 112.22455596923828, 58.35056686401367, 44.70005798339844, 33.750938415527344, 27.327356338500977, 52.60044860839844, 31.214290618896484, 1.7467193603515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000276.npy"}
|
||||
{"epoch": 0.41723356009070295, "step": 277, "batch_size": 64, "mean": 19.997926712036133, "std": 53.664188385009766, "min": -97.66073608398438, "p10": -49.855061721801746, "median": 13.21807861328125, "p90": 96.0373977661133, "max": 125.59475708007812, "pos_frac": 0.65625, "sample": [78.963623046875, -85.07919311523438, 83.34403228759766, -6.8504180908203125, 88.06360626220703, -66.05905151367188, 10.353706359863281, 14.515869140625, 79.96538543701172, 11.9202880859375, -40.180450439453125, -7.250846862792969, 43.831298828125, -1.7265167236328125, -32.49922180175781, 42.563201904296875, 38.406349182128906, -4.266754150390625, 0.9836158752441406, 22.25417709350586, 50.63831329345703, 30.94060516357422, 5.147686004638672, 7.187469482421875, -6.324504852294922, 18.991424560546875, -11.821060180664062, -71.42225646972656, 18.335372924804688, 98.61116027832031, 0.196258544921875, 40.65898132324219, -31.297792434692383, 111.09800720214844, 37.13811111450195, 102.23001098632812, -11.146354675292969, -76.7667465209961, 31.788925170898438, 45.74527359008789, -2.0083160400390625, -54.00132369995117, 101.69998168945312, 67.24295043945312, 30.148269653320312, 5.334049224853516, -9.407035827636719, 81.43184661865234, -94.3602066040039, 125.59475708007812, 59.951087951660156, -29.05047035217285, 101.45980834960938, 45.69139099121094, -6.1537628173828125, 0.7692642211914062, 90.03195190429688, -97.66073608398438, 62.94451904296875, 115.58836364746094, -40.00227355957031, 6.746940612792969, 0.14051246643066406, 56.55419158935547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000277.npy"}
|
||||
{"epoch": 0.41874527588813304, "step": 278, "batch_size": 64, "mean": 34.29515075683594, "std": 55.15033721923828, "min": -127.47834777832031, "p10": -28.092170906066894, "median": 24.084184646606445, "p90": 113.5318099975586, "max": 126.71005249023438, "pos_frac": 0.75, "sample": [-27.18882179260254, -48.212554931640625, 92.89401245117188, 32.66716003417969, -0.1101226806640625, -5.570438385009766, -97.55085754394531, 57.31328582763672, 123.48175811767578, 126.60282897949219, 41.856109619140625, -2.6371307373046875, 113.64522552490234, 113.26717376708984, 18.33917999267578, -9.061002731323242, 29.82918930053711, 73.74810791015625, 82.73138427734375, 84.77877807617188, 56.515174865722656, 62.20768737792969, 13.66513442993164, 3.9772415161132812, -63.95027160644531, 126.71005249023438, -30.610443115234375, 78.52359008789062, 17.273590087890625, 5.3185882568359375, 8.92938232421875, 47.39344024658203, -3.485595703125, -28.479320526123047, 62.586456298828125, 6.364189147949219, 107.83351135253906, 10.010482788085938, 70.75123596191406, 71.19483184814453, 116.89569854736328, 125.17343139648438, -6.544025421142578, 66.61723327636719, 46.296607971191406, 34.01311492919922, 0.06409454345703125, 4.681833267211914, -33.85871887207031, 5.137725830078125, 51.35786437988281, 73.13116455078125, -127.47834777832031, -2.7144126892089844, 9.208885192871094, 97.259033203125, 3.48876953125, 123.94851684570312, 47.9477653503418, 14.934257507324219, 10.328004837036133, -5.290836334228516, 103.5914535522461, 13.1484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000278.npy"}
|
||||
{"epoch": 0.42025699168556313, "step": 279, "batch_size": 64, "mean": 47.85323715209961, "std": 49.8919792175293, "min": -83.82316589355469, "p10": -3.0792345046997047, "median": 43.79929733276367, "p90": 116.8212158203125, "max": 129.59361267089844, "pos_frac": 0.875, "sample": [-0.6242504119873047, 4.577960968017578, 123.25823211669922, 29.22290802001953, -83.82316589355469, 104.99111938476562, 9.19308853149414, -4.131370544433594, 5.6476898193359375, 78.54541015625, 9.271385192871094, 105.13140869140625, 52.78997039794922, 111.3140869140625, 45.927188873291016, 129.59361267089844, 70.83331298828125, 69.97035217285156, -38.91889190673828, 5.568386077880859, -35.48431396484375, 106.83517456054688, 74.9045639038086, 45.84477996826172, 12.303321838378906, 67.2381820678711, 20.86857032775879, 0.6159477233886719, 19.615562438964844, 76.88655090332031, 23.431503295898438, 91.40176391601562, 126.74738311767578, 38.908653259277344, -9.002227783203125, 107.493896484375, 64.39822387695312, 31.6748046875, -10.664573669433594, 120.78213500976562, 117.23714447021484, 7.654804229736328, 115.85071563720703, 9.169624328613281, 30.403594970703125, 56.24798583984375, 3.8926734924316406, 5.7319793701171875, 41.753814697265625, 122.16614532470703, 77.96690368652344, -34.75965118408203, 83.23976135253906, 115.49383544921875, 1.8295021057128906, 11.641632080078125, 20.854843139648438, 13.827880859375, 90.73143005371094, 84.5919418334961, 91.1169662475586, 65.00466918945312, 120.86659240722656, 6.953987121582031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000279.npy"}
|
||||
{"epoch": 0.4217687074829932, "step": 280, "batch_size": 64, "mean": 47.96574783325195, "std": 49.18162536621094, "min": -29.55572509765625, "p10": -1.6950145721435546, "median": 25.974870681762695, "p90": 120.39842681884765, "max": 155.47976684570312, "pos_frac": 0.84375, "sample": [90.94415283203125, 120.51547241210938, 3.5535640716552734, 87.76864624023438, -7.474723815917969, 118.25628662109375, 1.6482257843017578, -4.9561614990234375, 2.1728572845458984, 70.51963806152344, 24.717269897460938, 3.3023834228515625, 92.24130249023438, 106.82968139648438, 117.34877014160156, 113.46231842041016, 54.098304748535156, 28.968753814697266, 111.51161193847656, 28.55359649658203, 5.983917236328125, 4.252368927001953, -1.7812881469726562, -1.4937095642089844, 120.12532043457031, 83.74598693847656, 8.607791900634766, 77.1272201538086, 92.89797973632812, 155.47976684570312, 25.936542510986328, -0.4035797119140625, 18.785064697265625, 83.39274597167969, 4.891063690185547, 42.05439758300781, 129.96737670898438, -0.3725433349609375, 122.1661605834961, 124.72142791748047, -29.55572509765625, -4.452190399169922, 6.060310363769531, -2.9239940643310547, 74.2650146484375, 15.591175079345703, 56.01868438720703, -4.453409194946289, 16.4317626953125, 8.338445663452148, 122.17472076416016, 39.72859573364258, 15.40256118774414, 108.46720123291016, 6.558675765991211, 26.013198852539062, 16.208297729492188, 10.717239379882812, 17.63816261291504, 20.155609130859375, 8.954643249511719, 110.26416015625, 126.361572265625, 45.77740478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000280.npy"}
|
||||
{"epoch": 0.42328042328042326, "step": 281, "batch_size": 64, "mean": 43.137847900390625, "std": 56.58744430541992, "min": -76.5511245727539, "p10": -17.632576179504394, "median": 30.31041717529297, "p90": 116.74944305419922, "max": 129.8916473388672, "pos_frac": 0.78125, "sample": [-31.33047866821289, 101.39781188964844, 10.477264404296875, 1.427520751953125, 106.12661743164062, 82.59622192382812, 32.952796936035156, 72.833740234375, 38.20404052734375, 121.93310546875, 44.10743713378906, 120.1363754272461, 86.03446960449219, 68.60875701904297, -1.578948974609375, 4.051637649536133, 7.342081069946289, 117.01359558105469, 0.1579608917236328, 100.76764678955078, 78.09085083007812, 2.6133575439453125, -15.817337036132812, 27.66803741455078, 107.46819305419922, 1.5028190612792969, 10.482337951660156, -13.286754608154297, 39.90934753417969, -76.5511245727539, 6.9463958740234375, 11.352031707763672, 4.9583587646484375, 20.5274715423584, 115.32334899902344, -70.48387908935547, 66.95057678222656, 129.8916473388672, 15.668777465820312, -55.93271255493164, 120.9924087524414, 109.65132141113281, 120.6739730834961, 67.80001831054688, 120.14385223388672, 22.060401916503906, 111.34147644042969, 11.001529693603516, 115.7885971069336, 62.862579345703125, -2.8864898681640625, 5.806121826171875, 116.13308715820312, -62.176063537597656, -0.7586555480957031, 103.1072998046875, 114.8212890625, -15.304931640625, 88.81307983398438, 91.01182556152344, -22.583393096923828, -6.639989852905273, 17.0321102142334, -18.41053581237793], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000281.npy"}
|
||||
{"epoch": 0.42479213907785335, "step": 282, "batch_size": 64, "mean": 39.37797546386719, "std": 62.33873748779297, "min": -107.11201477050781, "p10": -43.22130813598633, "median": 25.12004852294922, "p90": 123.14542465209962, "max": 157.01055908203125, "pos_frac": 0.6875, "sample": [24.22064971923828, -18.258140563964844, -62.61817169189453, -5.3788909912109375, 9.543037414550781, 104.13566589355469, 87.73635864257812, 8.704992294311523, -3.1827964782714844, 132.0009765625, 72.87036895751953, 113.87150573730469, 24.45195960998535, 106.80746459960938, 113.34938049316406, 19.28106689453125, 126.12159729003906, 67.56411743164062, -4.6187744140625, 90.61904907226562, 37.299896240234375, 37.805450439453125, -4.777673721313477, 134.70892333984375, 25.788137435913086, -11.500144958496094, -43.344512939453125, 111.83934020996094, 112.94821166992188, 8.48995590209961, -33.360294342041016, -16.543710708618164, -107.11201477050781, 112.49092864990234, 5.866537094116211, -55.926605224609375, 125.06023406982422, 11.157503128051758, -52.13124084472656, 35.47709655761719, 97.0483169555664, 109.85652160644531, 4.261627197265625, 55.527950286865234, 118.67753601074219, 157.01055908203125, 68.6326904296875, -0.9024543762207031, -2.8711395263671875, 32.658477783203125, -19.026473999023438, -15.129486083984375, -44.36598205566406, 18.37710189819336, -42.93383026123047, 134.6840057373047, 128.88975524902344, 87.5863037109375, -58.99591064453125, 18.85931396484375, 53.8470458984375, 58.20000457763672, 103.30441284179688, 15.536827087402344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000282.npy"}
|
||||
{"epoch": 0.42630385487528344, "step": 283, "batch_size": 64, "mean": 35.94493865966797, "std": 58.09090805053711, "min": -107.302490234375, "p10": -28.83804016113281, "median": 21.509090423583984, "p90": 114.80134429931641, "max": 135.043701171875, "pos_frac": 0.765625, "sample": [131.7797088623047, -69.70344543457031, -26.400344848632812, -35.43486022949219, 91.49162292480469, 74.68952941894531, 4.8836669921875, 7.766746520996094, 86.14100646972656, 1.3939437866210938, -19.67584228515625, 94.65310668945312, 35.910308837890625, 31.583965301513672, 115.4815673828125, 11.299922943115234, 12.1031494140625, 117.33443450927734, -29.882766723632812, 26.65435791015625, 107.84412384033203, 77.87074279785156, 98.59888458251953, 13.821708679199219, 71.80476379394531, 121.10205841064453, 89.46879577636719, -0.42911529541015625, 135.043701171875, 11.975944519042969, 1.6654281616210938, 113.21415710449219, -68.34662628173828, 8.877052307128906, 20.49333953857422, 68.115966796875, -78.59729766845703, 22.52484130859375, 60.61286926269531, 104.6617660522461, 118.48753356933594, -23.69923973083496, 105.43556213378906, 98.48788452148438, 14.80453109741211, 17.326854705810547, 28.392486572265625, -107.302490234375, 5.367063522338867, -57.868560791015625, 90.61906433105469, 1.1607551574707031, 79.55908203125, -2.653057098388672, 56.5867919921875, 86.2218017578125, -8.389968872070312, -11.469951629638672, 8.516382217407227, 3.767425537109375, -18.640472412109375, 126.33152770996094, 15.294677734375, 31.74736976623535], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000283.npy"}
|
||||
{"epoch": 0.42781557067271353, "step": 284, "batch_size": 64, "mean": 38.14597702026367, "std": 53.635963439941406, "min": -113.64566040039062, "p10": -3.4491649627685534, "median": 18.371397018432617, "p90": 117.89802856445313, "max": 136.0371551513672, "pos_frac": 0.796875, "sample": [-11.636672973632812, 6.161430358886719, 43.77024841308594, 22.625701904296875, 131.96612548828125, 70.0888671875, 3.3714351654052734, -18.56536865234375, 136.0371551513672, 6.771690368652344, 6.739376068115234, 19.747692108154297, -0.7306861877441406, -2.2300262451171875, 104.45126342773438, -3.9716529846191406, 8.478422164916992, 67.48887634277344, -0.6866455078125, 123.73117065429688, 16.995101928710938, 35.58445739746094, 12.036666870117188, -36.59367370605469, -18.377159118652344, 15.926445007324219, 4.144035339355469, 25.183853149414062, 47.940242767333984, -1.8722038269042969, 116.60490417480469, 75.56037139892578, 83.12438201904297, 95.23291015625, 9.240291595458984, 11.553581237792969, 100.93965148925781, 118.45222473144531, 126.364013671875, 8.649131774902344, 78.46402740478516, 135.7035675048828, 45.423797607421875, 1.7101478576660156, 1.220102310180664, -0.3608875274658203, 88.46876525878906, 6.8668975830078125, 65.9465103149414, 107.83749389648438, 3.4491214752197266, 58.187007904052734, 100.1741943359375, 7.131927490234375, 11.639801025390625, 35.48870849609375, 132.63768005371094, 56.519325256347656, 58.918888092041016, -113.64566040039062, 85.70503997802734, -98.407958984375, 13.359840393066406, -1.393514633178711], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000284.npy"}
|
||||
{"epoch": 0.4293272864701436, "step": 285, "batch_size": 64, "mean": 31.729633331298828, "std": 55.646060943603516, "min": -110.82909393310547, "p10": -36.1152286529541, "median": 20.021228790283203, "p90": 117.12044830322266, "max": 129.4685821533203, "pos_frac": 0.71875, "sample": [29.55901336669922, 5.489524841308594, -21.3203125, 115.57786560058594, 10.953628540039062, -36.819881439208984, -110.82909393310547, 4.3299560546875, 59.19374465942383, -34.471038818359375, 47.35502624511719, 119.12474060058594, 108.44239807128906, 16.429588317871094, 107.98411560058594, -49.877777099609375, 55.403419494628906, 121.73335266113281, 2.5791358947753906, 6.614795684814453, 37.73554992675781, 2.227100372314453, -47.879207611083984, -41.459102630615234, -0.5806045532226562, -67.192626953125, -4.720550537109375, 22.366806030273438, 32.0318603515625, -42.38053894042969, -16.380950927734375, 129.4685821533203, -12.429328918457031, 112.79337310791016, 62.19834899902344, 72.2142333984375, 45.76886749267578, 8.557754516601562, 66.15769958496094, 103.45621490478516, 75.93562316894531, -3.7053680419921875, 0.27706146240234375, 124.15757751464844, 90.75659942626953, 29.151771545410156, 0.791168212890625, 117.78155517578125, -9.03271484375, -2.867534637451172, 38.639305114746094, 96.89173889160156, 17.67565155029297, 55.84606170654297, 0.4621429443359375, 47.92491149902344, 124.75479888916016, 124.91998291015625, -24.28382110595703, 8.959579467773438, 0.24322509765625, 36.25450134277344, -12.194122314453125, 71.95112609863281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000285.npy"}
|
||||
{"epoch": 0.4308390022675737, "step": 286, "batch_size": 64, "mean": 50.71385955810547, "std": 56.25233459472656, "min": -54.58732986450195, "p10": -3.263705444335937, "median": 38.68081283569336, "p90": 132.25442199707032, "max": 166.349365234375, "pos_frac": 0.859375, "sample": [48.896087646484375, 8.598169326782227, 1.0885028839111328, 66.36394500732422, 7.5455322265625, 118.67063903808594, 8.2276611328125, 1.6760501861572266, 25.65789222717285, -3.5252532958984375, 57.74949645996094, 6.3479461669921875, 36.922943115234375, 105.45455932617188, 117.73591613769531, 139.44137573242188, 109.93525695800781, 14.104646682739258, -19.024147033691406, 11.481239318847656, 85.32820129394531, -23.632606506347656, 43.468135833740234, 127.19319152832031, 3.452007293701172, -53.56346130371094, 73.8193130493164, 46.1519889831543, -34.87767028808594, 61.56706237792969, 117.88884735107422, 79.53218078613281, 121.97674560546875, 134.41622924804688, 74.54249572753906, 147.3577880859375, 35.81192398071289, 109.61430358886719, -54.58732986450195, 4.9499664306640625, 132.89865112304688, 166.349365234375, 22.49591064453125, -2.6534271240234375, 90.45991516113281, 9.937637329101562, 13.36387825012207, 27.506729125976562, 130.751220703125, 125.30165100097656, 133.09344482421875, 40.438682556152344, 24.903797149658203, 12.019292831420898, 0.9079551696777344, 28.250564575195312, 75.1668472290039, 14.22024917602539, -44.19818115234375, 142.17848205566406, 61.01994323730469, -0.11175918579101562, 68.62223815917969, 9.006317138671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000286.npy"}
|
||||
{"epoch": 0.4323507180650038, "step": 287, "batch_size": 64, "mean": 50.73650360107422, "std": 59.130455017089844, "min": -107.26343536376953, "p10": -3.269147682189941, "median": 32.22881317138672, "p90": 131.5609329223633, "max": 147.4062957763672, "pos_frac": 0.828125, "sample": [41.14038848876953, -60.195587158203125, 9.156978607177734, 95.53312683105469, 147.4062957763672, 126.6473388671875, 11.846878051757812, 27.437227249145508, 59.248069763183594, -2.639617919921875, 60.27317810058594, 12.196544647216797, 50.45497131347656, 32.82292175292969, 136.06253051757812, 136.99761962890625, 140.39971923828125, 65.05677032470703, 123.5216064453125, 92.80555725097656, 32.77092742919922, 29.4429931640625, 113.779052734375, 1.5272045135498047, 8.990684509277344, 99.59973907470703, -1.9150810241699219, -29.831459045410156, 112.36193084716797, 1.4195137023925781, 109.8912124633789, 7.7220611572265625, -107.26343536376953, 7.786964416503906, -3.5389461517333984, 97.25659942626953, 23.85100555419922, -1.9699974060058594, 26.37360382080078, 114.16668701171875, 112.15721130371094, -23.99475860595703, 131.66543579101562, 107.47709655761719, -13.115325927734375, 3.636993408203125, 131.3170928955078, 2.959056854248047, 112.01424407958984, 117.61337280273438, 0.2795276641845703, 7.3421478271484375, 93.46514129638672, 4.272794723510742, 2.519397735595703, 31.68669891357422, 134.72840881347656, 9.379173278808594, 79.03256225585938, -3.8185768127441406, -1.5023880004882812, 134.80108642578125, 122.76112365722656, 1.8650054931640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000287.npy"}
|
||||
{"epoch": 0.43386243386243384, "step": 288, "batch_size": 64, "mean": 37.137718200683594, "std": 59.393653869628906, "min": -103.92733764648438, "p10": -27.227784729003904, "median": 28.767470359802246, "p90": 123.91878738403321, "max": 148.91705322265625, "pos_frac": 0.703125, "sample": [30.117626190185547, 3.0785140991210938, -27.631317138671875, -103.92733764648438, 38.820762634277344, 19.467754364013672, 17.585142135620117, -9.963977813720703, 80.94757080078125, 51.11186981201172, 15.569766998291016, 77.31339263916016, 13.060951232910156, 121.89690399169922, -10.088226318359375, 87.00740051269531, -26.286209106445312, 57.788516998291016, -19.35382080078125, 66.16873168945312, -88.7383041381836, 121.48196411132812, 148.91705322265625, 124.78530883789062, -10.430953979492188, 132.59762573242188, 27.417314529418945, 54.08476638793945, 4.817893981933594, 33.10868835449219, 131.97470092773438, -75.66899108886719, 54.192840576171875, 66.74978637695312, 126.23658752441406, 66.0994873046875, 48.03456115722656, -38.439971923828125, 38.45707702636719, -20.402870178222656, -51.13701629638672, -56.06011962890625, 125.0501480102539, -2.009929656982422, -0.8344955444335938, -12.968612670898438, 102.28980255126953, -10.692962646484375, 113.29072570800781, 115.90765380859375, 106.71432495117188, -0.7062530517578125, 87.99932861328125, 10.472564697265625, 0.3907623291015625, -0.21181869506835938, 79.54942321777344, 19.576805114746094, 0.5677776336669922, 128.96400451660156, 21.88429832458496, 75.24497985839844, 22.342788696289062, 73.23124694824219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000288.npy"}
|
||||
{"epoch": 0.43537414965986393, "step": 289, "batch_size": 64, "mean": 34.13987350463867, "std": 63.109825134277344, "min": -121.37191772460938, "p10": -37.683573150634764, "median": 17.56527328491211, "p90": 123.04625244140625, "max": 156.4635009765625, "pos_frac": 0.703125, "sample": [35.84331512451172, 14.376678466796875, 113.08723449707031, -15.359792709350586, -39.329322814941406, -17.877685546875, -11.868419647216797, 31.781753540039062, 94.64895629882812, 31.688072204589844, 5.11981201171875, 34.17900085449219, 111.6215591430664, 112.38121032714844, 37.67622375488281, 33.540245056152344, 120.77337646484375, -41.380584716796875, -9.846256256103516, 119.89812469482422, 15.195175170898438, 129.42481994628906, 1.4327392578125, -33.84349060058594, 108.7219467163086, 0.9166717529296875, 123.90568542480469, 4.462650299072266, -1.5961265563964844, 129.44862365722656, -2.2240753173828125, 31.13666343688965, -23.64383316040039, -121.37191772460938, 142.77804565429688, -4.523952484130859, 6.427087783813477, 156.4635009765625, 10.399887084960938, 18.55773162841797, 1.4972667694091797, 17.378395080566406, -42.793670654296875, 22.63648223876953, 65.109130859375, -32.324851989746094, 53.49530792236328, -4.164882659912109, -53.43382263183594, -89.00567626953125, 96.99185180664062, 18.424041748046875, 5.01446533203125, 17.752151489257812, 127.2602310180664, 120.50320434570312, 98.43408966064453, 121.04090881347656, 124.69606018066406, 5.1837921142578125, 1.2351264953613281, -52.502784729003906, 116.44615173339844, -6.942394256591797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000289.npy"}
|
||||
{"epoch": 0.436885865457294, "step": 290, "batch_size": 64, "mean": 42.03026580810547, "std": 64.39334106445312, "min": -111.38381958007812, "p10": -12.873827934265133, "median": 30.642532348632812, "p90": 130.55367736816407, "max": 142.2322998046875, "pos_frac": 0.75, "sample": [86.7108154296875, 29.52770233154297, 112.17533111572266, 142.2322998046875, 51.85130310058594, 9.095739364624023, -14.139490127563477, -7.278314590454102, 4.67071533203125, 140.30307006835938, 56.75843811035156, 25.07967758178711, 30.91510772705078, -85.87369537353516, 39.40122985839844, -3.7411041259765625, -8.043899536132812, -21.33575439453125, -7.627506256103516, -9.920616149902344, 52.96856689453125, 30.369956970214844, 131.14645385742188, 133.59243774414062, 24.44591522216797, 18.67882537841797, 9.890377044677734, -3.2896728515625, 6.987859725952148, -92.42645263671875, 129.03567504882812, 135.23355102539062, 117.5195083618164, 44.65129470825195, 46.58734130859375, 135.49746704101562, 127.33118438720703, 82.99424743652344, 40.742820739746094, -73.3023681640625, 115.86479187011719, 109.54903411865234, 14.202281951904297, -109.07232666015625, -1.3438949584960938, -6.70538330078125, -4.732294082641602, 137.49884033203125, 123.9836654663086, 119.79310607910156, 23.59872817993164, 55.412811279296875, 3.072479248046875, 17.982437133789062, -111.38381958007812, 118.22467041015625, 16.788429260253906, 129.1705322265625, 38.924774169921875, 42.88599395751953, 90.17762756347656, 26.398162841796875, 67.26042175292969, 2.9698257446289062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000290.npy"}
|
||||
{"epoch": 0.4383975812547241, "step": 291, "batch_size": 64, "mean": 45.17587661743164, "std": 61.4099006652832, "min": -116.86376953125, "p10": -20.681122970581054, "median": 20.07052993774414, "p90": 130.30879974365234, "max": 187.7540283203125, "pos_frac": 0.75, "sample": [152.68649291992188, 129.2514190673828, 18.731903076171875, -22.752182006835938, 29.384353637695312, 21.768630981445312, -17.03234100341797, -0.04779052734375, 127.15411376953125, 6.412242889404297, 86.46626281738281, 87.1640853881836, -21.53293228149414, 0.25517845153808594, 122.10249328613281, 118.0541000366211, -24.90937042236328, -14.595603942871094, 18.26483154296875, 113.65386199951172, -3.837921142578125, 13.299697875976562, 35.13768005371094, 47.337425231933594, 61.494625091552734, 110.93344116210938, 117.391357421875, -1.0833396911621094, 133.10206604003906, 4.7027587890625, 64.46565246582031, 137.4303436279297, -1.0945243835449219, 13.618911743164062, -18.995880126953125, 5.597597122192383, 102.45257568359375, -4.7085418701171875, 134.84596252441406, 13.489784240722656, -1.4716110229492188, 130.761962890625, -116.86376953125, 12.235923767089844, 132.51876831054688, 42.87348937988281, 51.2576904296875, 120.9422607421875, 80.75464630126953, 187.7540283203125, -21.403369903564453, 16.193649291992188, -34.621883392333984, 21.409156799316406, -44.32109832763672, 5.932247161865234, 15.738449096679688, 126.5464096069336, 10.457740783691406, 91.15328216552734, 90.52288818359375, 65.92335510253906, 7.2061614990234375, 3.6962661743164062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000291.npy"}
|
||||
{"epoch": 0.4399092970521542, "step": 292, "batch_size": 64, "mean": 36.16666793823242, "std": 64.46073150634766, "min": -119.10917663574219, "p10": -39.80833282470703, "median": 27.126094818115234, "p90": 119.76590270996094, "max": 174.24374389648438, "pos_frac": 0.703125, "sample": [-104.85322570800781, -10.112922668457031, 96.21665954589844, 0.8194427490234375, 132.55340576171875, 68.40308380126953, 39.47025680541992, 174.24374389648438, 103.79437255859375, -23.828018188476562, -41.00514221191406, 4.551698684692383, 8.252962112426758, 9.951408386230469, -58.50019836425781, -1.493011474609375, 71.03125, 37.04835510253906, 83.99078369140625, 16.737106323242188, 36.12571716308594, 99.52925109863281, 89.25674438476562, -44.36334991455078, -13.933685302734375, -3.1744766235351562, -0.7010917663574219, 120.28963470458984, -37.015777587890625, 93.84669494628906, 26.189414978027344, 103.23173522949219, 7.524965286254883, -8.565444946289062, -0.3311595916748047, -110.39522552490234, 98.40925598144531, -119.10917663574219, 0.061588287353515625, 5.399925231933594, -9.969146728515625, 118.54386138916016, 102.06803894042969, -9.351907730102539, 134.30645751953125, 94.24929809570312, 7.230445861816406, 58.93657684326172, 34.77333450317383, 7.3899993896484375, 139.28240966796875, 113.59661102294922, 3.362344741821289, 107.33366394042969, 63.03114700317383, 19.064659118652344, 76.72003173828125, 32.42172622680664, 128.0684356689453, 35.865989685058594, 28.062774658203125, -5.747314453125, -50.611732482910156, 136.491455078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000292.npy"}
|
||||
{"epoch": 0.4414210128495843, "step": 293, "batch_size": 64, "mean": 51.426517486572266, "std": 70.3485107421875, "min": -131.31764221191406, "p10": -23.02245368957519, "median": 36.03325653076172, "p90": 132.32110900878905, "max": 176.42547607421875, "pos_frac": 0.78125, "sample": [4.878688812255859, 27.280136108398438, 9.80844497680664, 130.80516052246094, 92.78984069824219, 28.437633514404297, -24.950443267822266, 16.178634643554688, 102.62028503417969, 4.39923095703125, 131.91676330566406, -8.106901168823242, 133.91244506835938, 101.32782745361328, 108.55877685546875, 4.824287414550781, 121.66989135742188, 78.56333923339844, 115.462890625, 116.0866470336914, 105.63278198242188, -35.507774353027344, -12.193595886230469, -80.59024047851562, 104.09249877929688, 1.3131561279296875, 96.09896850585938, -49.26266860961914, 13.755172729492188, 155.86749267578125, 0.9768638610839844, 144.62269592285156, 151.53726196289062, -96.18135833740234, 111.95407104492188, 5.670871734619141, 127.71143341064453, -131.31764221191406, -7.024463653564453, 10.963294982910156, 29.230361938476562, 131.3394317626953, -73.72795104980469, -10.631393432617188, -6.35894775390625, 2.8227767944335938, 43.92425537109375, 109.1669692993164, 176.42547607421875, 25.950653076171875, 118.03733825683594, 7.27392578125, 60.80620574951172, 7.401119232177734, 125.04779052734375, -18.52381134033203, 125.06657409667969, 22.014450073242188, 87.4318618774414, 132.49440002441406, 42.836151123046875, -5.971759796142578, 111.22744750976562, 133.43130493164062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000293.npy"}
|
||||
{"epoch": 0.4429327286470144, "step": 294, "batch_size": 64, "mean": 34.03075408935547, "std": 55.401512145996094, "min": -103.94632720947266, "p10": -15.917061996459955, "median": 14.211006164550781, "p90": 127.67697143554688, "max": 161.70584106445312, "pos_frac": 0.71875, "sample": [127.15968322753906, 24.7099609375, 44.5936279296875, 18.517120361328125, 118.82612609863281, 14.786521911621094, 132.5038299560547, -29.802947998046875, -34.775390625, 4.857780456542969, 68.79368591308594, -103.94632720947266, -7.817892074584961, 25.002593994140625, 18.0762882232666, 67.69341278076172, 127.89866638183594, 9.582618713378906, 128.83668518066406, 17.950477600097656, -1.632843017578125, 13.635490417480469, 1.8848457336425781, 71.76748657226562, -7.155738830566406, 137.64100646972656, -4.958232879638672, 74.14817810058594, -2.4434165954589844, 161.6844482421875, 112.019775390625, -34.25321578979492, 2.8316287994384766, 161.70584106445312, -1.5306644439697266, 1.411834716796875, -6.825836181640625, -2.6555252075195312, -26.117584228515625, 35.41752624511719, 8.974983215332031, 4.28594970703125, 125.44889068603516, -1.0930442810058594, 4.408010482788086, 60.825950622558594, 20.683897018432617, 42.49722671508789, 6.826507568359375, 133.73831176757812, -18.421634674072266, 52.34170913696289, 97.346435546875, 5.073478698730469, 4.079206466674805, 46.304115295410156, 2.2939453125, -0.7042446136474609, 20.456695556640625, 5.8842620849609375, 85.6730728149414, -24.471210479736328, 45.56752395629883, -10.07305908203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000294.npy"}
|
||||
{"epoch": 0.4444444444444444, "step": 295, "batch_size": 64, "mean": 32.47895812988281, "std": 60.63702392578125, "min": -105.6456298828125, "p10": -35.2160057067871, "median": 14.009769439697266, "p90": 120.48949432373047, "max": 163.05319213867188, "pos_frac": 0.71875, "sample": [128.55056762695312, 13.342971801757812, -4.4247283935546875, -11.004348754882812, 53.99866485595703, 112.20729064941406, 119.49940490722656, 40.75492477416992, 19.585372924804688, 28.904495239257812, 137.31564331054688, -14.761005401611328, 163.05319213867188, 14.676567077636719, 7.811954498291016, 10.647972106933594, 2.5498046875, 6.968269348144531, -25.377334594726562, 88.113525390625, 131.19671630859375, -91.30110168457031, -3.0178871154785156, 8.389982223510742, 73.35165405273438, 19.4827880859375, 6.037553787231445, -12.657413482666016, 120.913818359375, 6.418052673339844, -49.31224060058594, -3.2070693969726562, 137.19638061523438, 117.53903198242188, 14.870025634765625, -71.30484008789062, 113.02803039550781, 28.61178970336914, 48.99856185913086, 133.6349639892578, 57.98707580566406, -105.6456298828125, 10.424118041992188, -69.2205810546875, 93.61093139648438, 50.67150115966797, -41.09954833984375, -8.285392761230469, -39.432579040527344, 75.02882385253906, -9.989347457885742, 3.0103397369384766, 20.8974609375, 8.412612915039062, 6.468740463256836, 6.937114715576172, 89.97561645507812, -11.473388671875, 28.104103088378906, 3.1898345947265625, 87.56970977783203, 97.11688232421875, 108.46263885498047, -5.349815368652344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000295.npy"}
|
||||
{"epoch": 0.4459561602418745, "step": 296, "batch_size": 64, "mean": 40.978172302246094, "std": 65.5432357788086, "min": -112.29135131835938, "p10": -46.41376762390136, "median": 29.474042892456055, "p90": 129.55938415527345, "max": 159.51397705078125, "pos_frac": 0.8125, "sample": [67.1640625, 120.445556640625, 117.35316467285156, 9.807111740112305, 1.1721115112304688, 25.68572998046875, 2.9676342010498047, 150.39376831054688, 1.7126808166503906, 103.6890869140625, 26.356792449951172, 12.466777801513672, -61.7352294921875, 134.1294403076172, 1.160888671875, 55.275146484375, 125.63839721679688, 79.31118774414062, 13.106399536132812, 45.41770935058594, 1.1468963623046875, 39.65866470336914, 0.856475830078125, -73.91351318359375, -34.067657470703125, -21.111846923828125, 1.403778076171875, 48.56464385986328, 17.829139709472656, 20.36083984375, 13.725753784179688, -76.246826171875, 41.6976318359375, -14.26348876953125, 159.51397705078125, 58.81196594238281, 4.355987548828125, -50.73109817504883, 113.48810577392578, -77.90534973144531, 49.07756805419922, 88.8739013671875, 135.82205200195312, -112.29135131835938, 119.15083312988281, 32.59129333496094, 93.39152526855469, 95.13325500488281, 100.11343383789062, 79.95328521728516, 123.21360778808594, 21.032943725585938, 131.23980712890625, 15.656728744506836, 155.5679931640625, -10.0897216796875, 8.980194091796875, 136.78311157226562, 3.4685516357421875, -36.339996337890625, -68.42605590820312, 100.73167419433594, 88.86520385742188, 65.41069793701172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000296.npy"}
|
||||
{"epoch": 0.4474678760393046, "step": 297, "batch_size": 64, "mean": 44.568824768066406, "std": 56.482269287109375, "min": -98.47271728515625, "p10": -11.629810333251953, "median": 34.52745246887207, "p90": 128.52895660400392, "max": 140.70343017578125, "pos_frac": 0.78125, "sample": [117.81605529785156, -39.33808898925781, 9.149009704589844, 99.36022186279297, 39.455902099609375, 21.349517822265625, 12.209342956542969, 126.10507202148438, -3.046234130859375, -98.47271728515625, 101.93807983398438, -1.366546630859375, 24.105670928955078, 43.73318099975586, 2.821870803833008, 132.84825134277344, 17.941390991210938, 67.10049438476562, 13.54733657836914, 6.3449859619140625, 42.02354431152344, 10.318435668945312, -43.73606872558594, -66.06455993652344, -14.420951843261719, 129.56776428222656, 34.327247619628906, 8.24945068359375, 106.96063232421875, 5.816856384277344, 123.44351959228516, 134.03965759277344, -6.1258544921875, -8.614656448364258, 14.934085845947266, 23.01790428161621, 9.44537353515625, -10.76861572265625, 105.47784423828125, 63.972923278808594, 49.166351318359375, 34.727657318115234, 102.98139953613281, 55.73976135253906, 27.770034790039062, 91.89160919189453, 37.163124084472656, 0.2864351272583008, -13.844879150390625, 130.0832061767578, 2.415088653564453, 108.63900756835938, 122.76045227050781, 96.89134216308594, -1.9486351013183594, 68.64237976074219, 135.58485412597656, 71.75741577148438, -0.0147705078125, 140.70343017578125, -11.998893737792969, 34.9475212097168, 136.94338989257812, 75.6500244140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000297.npy"}
|
||||
{"epoch": 0.4489795918367347, "step": 298, "batch_size": 64, "mean": 33.41916275024414, "std": 66.70957946777344, "min": -124.39756774902344, "p10": -53.34180984497069, "median": 13.864654541015625, "p90": 126.92649688720704, "max": 148.37872314453125, "pos_frac": 0.734375, "sample": [32.347633361816406, -88.45394134521484, 148.37872314453125, 15.055450439453125, 135.75357055664062, 7.74444580078125, 101.63687133789062, 0.5901870727539062, -60.23760223388672, 109.95279693603516, 136.10435485839844, 122.4871826171875, -59.06732177734375, 34.344940185546875, 9.559669494628906, 55.956668853759766, 90.81463623046875, -43.24158477783203, 98.33222961425781, 90.15798950195312, 2.215167999267578, 0.6101493835449219, 107.58538055419922, 37.105194091796875, 28.356857299804688, 7.691375732421875, -89.52532196044922, 11.055303573608398, 82.3668441772461, 126.7144546508789, 1.946218490600586, 135.84413146972656, -8.312484741210938, 11.145164489746094, 31.319843292236328, 99.93878173828125, 6.131315231323242, 127.94414520263672, 4.686197280883789, 1.5467910766601562, -3.9209976196289062, -2.198209762573242, -0.0170440673828125, 17.95172119140625, 19.029926300048828, 146.70794677734375, 125.40240478515625, -0.136444091796875, -18.568988800048828, 6.088714599609375, 45.32963943481445, 12.673858642578125, -43.70110321044922, -57.473541259765625, -7.9751434326171875, 111.17484283447266, -88.43516540527344, 90.72117614746094, -18.629425048828125, -124.39756774902344, 127.01737213134766, 18.839649200439453, 0.8365402221679688, 117.92377471923828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000298.npy"}
|
||||
{"epoch": 0.4504913076341648, "step": 299, "batch_size": 64, "mean": 29.639911651611328, "std": 69.04396057128906, "min": -116.7811508178711, "p10": -64.6180061340332, "median": 20.854446411132812, "p90": 118.77997131347657, "max": 165.19830322265625, "pos_frac": 0.65625, "sample": [117.35675048828125, 91.58572387695312, -14.30000114440918, -17.597023010253906, 8.896211624145508, 106.53875732421875, 24.5111083984375, 65.81855773925781, 165.19830322265625, -0.5404338836669922, -57.06440734863281, 79.06858825683594, -116.7811508178711, 2.8670272827148438, -97.17584228515625, -20.9052734375, 21.829452514648438, 123.35603332519531, 57.64186096191406, -53.160743713378906, 129.1156005859375, -28.867046356201172, 40.44114303588867, 73.92649841308594, -69.82473754882812, -99.15779876708984, -3.28192138671875, 0.3061866760253906, 140.0020294189453, -1.8935317993164062, 106.52999877929688, -2.3229446411132812, 56.94007110595703, -95.99630737304688, 8.187774658203125, 6.2259521484375, 57.416290283203125, 119.38992309570312, 15.753364562988281, 19.95301055908203, 58.36346435546875, -52.72179412841797, 112.76502227783203, 111.97940063476562, 138.64710998535156, -67.85526275634766, 12.345129013061523, 97.57122039794922, 90.17819213867188, 84.36761474609375, 0.6298084259033203, 32.7178955078125, 89.16490173339844, -86.94883728027344, 7.923736572265625, 88.74382019042969, -20.795013427734375, 109.25474548339844, -0.7316303253173828, 21.755882263183594, -0.9938411712646484, 37.956214904785156, -57.04990768432617, 129.69947814941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000299.npy"}
|
||||
{"epoch": 0.4520030234315949, "step": 300, "batch_size": 64, "mean": 37.48223876953125, "std": 65.56111145019531, "min": -103.37577819824219, "p10": -54.72380065917968, "median": 33.60799026489258, "p90": 130.2342269897461, "max": 150.06288146972656, "pos_frac": 0.734375, "sample": [34.208595275878906, 112.12739562988281, 8.772598266601562, 77.1297607421875, -2.493122100830078, 33.00738525390625, 103.3108139038086, 8.561630249023438, 0.31534767150878906, -90.03401184082031, 139.74131774902344, 23.598304748535156, 95.93476867675781, 140.0480194091797, 136.86119079589844, 61.43310546875, 132.37538146972656, -57.8651123046875, -26.887916564941406, 103.17167663574219, 2.1181507110595703, 94.68372344970703, 41.37983703613281, -14.493148803710938, -75.0755615234375, 85.76493835449219, 115.08348083496094, -15.535469055175781, 36.827613830566406, 19.72604751586914, 4.612102508544922, -103.37577819824219, 91.87858581542969, -1.1353492736816406, -91.80990600585938, 48.90102767944336, 34.361595153808594, 140.35385131835938, 3.9093780517578125, 129.62786865234375, 65.69064331054688, 38.79068374633789, 4.348785400390625, 150.06288146972656, 100.8827133178711, -47.394073486328125, 0.3453559875488281, 106.39628601074219, -7.314056396484375, 69.66847229003906, 71.50985717773438, 12.152790069580078, 26.40130615234375, 42.16631317138672, -10.092613220214844, -74.39375305175781, 26.169723510742188, 120.05569458007812, 76.84014892578125, -75.5914077758789, 13.988014221191406, -0.33048248291015625, 130.4940948486328, -23.10413360595703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000300.npy"}
|
||||
{"epoch": 0.45351473922902497, "step": 301, "batch_size": 64, "mean": 33.403724670410156, "std": 63.543479919433594, "min": -128.35452270507812, "p10": -30.76671333312988, "median": 26.837244987487793, "p90": 124.08845367431641, "max": 173.48037719726562, "pos_frac": 0.75, "sample": [110.15837860107422, 13.577873229980469, 47.71392059326172, 34.10780715942383, 39.32298278808594, -32.376953125, 106.29305267333984, 134.67135620117188, 100.64656066894531, 12.358924865722656, 38.00493621826172, 29.72971534729004, 21.543014526367188, -10.13692855834961, -112.9777603149414, 18.178329467773438, 0.190399169921875, 85.07684326171875, 90.6087646484375, 4.950611114501953, -53.44001007080078, 52.849857330322266, 127.62789916992188, -6.374595642089844, 52.21673583984375, 2.7374839782714844, -4.847883224487305, 127.15200805664062, 88.70132446289062, -25.079078674316406, -17.568103790283203, 37.89856719970703, 64.07524108886719, 121.14071655273438, -55.54682159423828, 0.07866668701171875, -26.56695556640625, 173.48037719726562, -63.163787841796875, 23.944774627685547, -24.522329330444336, 37.80479431152344, 31.063308715820312, -94.82958984375, 4.268497467041016, 92.11988067626953, 112.31060791015625, 52.33599853515625, 19.501144409179688, 124.15978240966797, 11.276945114135742, 72.96656799316406, 74.82857513427734, -27.00948715209961, -23.846603393554688, 6.008304595947266, 125.90029907226562, 123.9220199584961, 35.55131530761719, 5.692863464355469, 12.240760803222656, -128.35452270507812, 136.82659912109375, 6.664665222167969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000301.npy"}
|
||||
{"epoch": 0.455026455026455, "step": 302, "batch_size": 64, "mean": 25.149852752685547, "std": 60.448612213134766, "min": -133.66587829589844, "p10": -44.015618515014644, "median": 19.425729751586914, "p90": 107.51338043212891, "max": 160.4093017578125, "pos_frac": 0.71875, "sample": [66.5072021484375, 148.72683715820312, 2.5381622314453125, 160.4093017578125, 59.85400390625, 25.610788345336914, 106.50212097167969, -15.541328430175781, 135.36660766601562, 15.637420654296875, 40.284759521484375, 36.232337951660156, 5.155866622924805, 7.732639312744141, 81.39654541015625, 121.35552978515625, 3.4290008544921875, 11.293388366699219, 11.237556457519531, 12.510101318359375, 22.291637420654297, -90.7529525756836, -93.05110168457031, 30.001205444335938, 19.448318481445312, -9.197031021118164, 40.07086181640625, 54.59412384033203, -4.204532623291016, -5.5662384033203125, -20.268165588378906, 59.31794738769531, 66.86367797851562, 3.9666194915771484, -52.38695526123047, 116.51573181152344, 135.4599609375, -36.09735107421875, 12.763763427734375, 37.80036544799805, 55.02015686035156, 91.12812805175781, 1.2427330017089844, 48.20127868652344, 44.6827392578125, 55.49188232421875, -123.23240661621094, 1.117462158203125, -25.733535766601562, 47.758506774902344, 93.15984344482422, 55.49330139160156, -5.943687438964844, -0.45304107666015625, 19.403141021728516, -45.893924713134766, -39.632904052734375, 107.94677734375, -11.515899658203125, -61.8738899230957, -133.66587829589844, 20.5113525390625, 82.37858581542969, 10.191144943237305], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000302.npy"}
|
||||
{"epoch": 0.4565381708238851, "step": 303, "batch_size": 64, "mean": 46.34803009033203, "std": 68.51551055908203, "min": -105.48854064941406, "p10": -54.279592132568354, "median": 54.120697021484375, "p90": 129.91275482177736, "max": 164.42523193359375, "pos_frac": 0.6875, "sample": [-10.850906372070312, 70.66986846923828, 87.52505493164062, 24.223695755004883, 136.92417907714844, 108.06234741210938, 95.61308288574219, -57.311134338378906, 112.5584487915039, 58.0386962890625, 123.16354370117188, -43.055137634277344, 34.76974868774414, -5.818941116333008, -87.60769653320312, -74.59968566894531, -61.897674560546875, 125.36441040039062, -105.48854064941406, -26.283447265625, -0.052211761474609375, 17.19227409362793, 164.42523193359375, 78.62969970703125, -47.20599365234375, 93.39682006835938, 94.23245239257812, -69.4048843383789, -3.8283615112304688, 27.204559326171875, 67.3643569946289, -2.7870635986328125, 7.667152404785156, 53.47282409667969, 17.157333374023438, -13.98574447631836, 130.43817138671875, 15.669258117675781, 95.48243713378906, 100.73243713378906, 18.94239044189453, 117.64234924316406, 63.342979431152344, 137.3435821533203, -3.6202011108398438, 109.83409118652344, 139.32676696777344, 42.6829833984375, 54.76856994628906, 126.22225189208984, -16.342391967773438, -7.530107498168945, -59.7183952331543, 47.29315185546875, 134.0175323486328, 107.99324035644531, 19.419450759887695, 82.87612915039062, 74.81390380859375, 118.79901123046875, 128.68678283691406, -37.05253601074219, 91.63604736328125, 145.09555053710938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000303.npy"}
|
||||
{"epoch": 0.4580498866213152, "step": 304, "batch_size": 64, "mean": 46.881046295166016, "std": 52.87440490722656, "min": -118.35153198242188, "p10": -7.606964874267576, "median": 44.38036346435547, "p90": 119.2342399597168, "max": 149.86436462402344, "pos_frac": 0.8125, "sample": [128.1758270263672, -1.4642791748046875, 72.86581420898438, 134.1909942626953, -8.593978881835938, 64.04017639160156, 36.39087677001953, 65.22725677490234, -30.265241622924805, 19.17862319946289, 9.010772705078125, 101.39459228515625, 96.867431640625, 41.38929748535156, 108.84768676757812, 51.48082733154297, -2.8731117248535156, 40.18729782104492, 88.86387634277344, -118.35153198242188, 18.639404296875, 115.50718688964844, 45.34136962890625, 26.46246337890625, -5.303932189941406, 9.578857421875, 5.665948867797852, 55.52706527709961, 39.77586364746094, 116.63877868652344, 49.711936950683594, 4.082830429077148, 148.50631713867188, 45.599037170410156, 14.932846069335938, -51.379425048828125, 87.26517486572266, 78.1838607788086, 26.12816619873047, 65.04348754882812, 1.715188980102539, 1.7285900115966797, 149.86436462402344, 53.17060852050781, 1.2904586791992188, 11.638275146484375, 58.040653228759766, 69.92292022705078, 120.3465805053711, 135.28451538085938, -19.26068115234375, -2.6722564697265625, 93.91567993164062, -0.8510017395019531, 100.7321548461914, -8.938053131103516, 49.826541900634766, 130.36087036132812, 86.27407836914062, 27.505569458007812, 28.498428344726562, -21.76456069946289, 43.41935729980469, 97.86836242675781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000304.npy"}
|
||||
{"epoch": 0.4595616024187453, "step": 305, "batch_size": 64, "mean": 33.715187072753906, "std": 66.1142349243164, "min": -134.13946533203125, "p10": -57.00418548583984, "median": 30.632932662963867, "p90": 119.14811477661134, "max": 148.38917541503906, "pos_frac": 0.75, "sample": [118.44876098632812, 15.292045593261719, 14.338516235351562, -70.6619644165039, -14.757522583007812, 0.5930213928222656, 144.92401123046875, -65.57147979736328, 33.42338562011719, 128.8287353515625, 32.27775192260742, 142.37530517578125, -34.987762451171875, 109.927978515625, -102.44392395019531, -23.31044578552246, 18.487503051757812, 5.430698394775391, -68.23443603515625, 71.56582641601562, 15.131366729736328, 17.225322723388672, 16.53839874267578, 52.630340576171875, 42.98005676269531, 69.00444030761719, 74.31808471679688, 2.1283416748046875, 51.51324462890625, -134.13946533203125, 2.1026611328125, 44.98485565185547, 117.48112487792969, 101.34320831298828, -19.585205078125, 34.595069885253906, 148.38917541503906, -24.437606811523438, 19.904142379760742, 58.00750732421875, 15.387588500976562, 8.299064636230469, 65.48075103759766, 138.26675415039062, 104.6454086303711, -4.721488952636719, 16.107879638671875, -57.91838073730469, 133.4117431640625, 70.57528686523438, 101.7425308227539, 31.814476013183594, 29.45138931274414, 105.49414825439453, 60.04090118408203, 103.53274536132812, -52.512481689453125, 67.88621520996094, -93.70323181152344, 119.44783782958984, -17.19222068786621, 17.07671546936035, 103.96821594238281, -54.871063232421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000305.npy"}
|
||||
{"epoch": 0.46107331821617537, "step": 306, "batch_size": 64, "mean": 36.52022933959961, "std": 52.002838134765625, "min": -109.53096008300781, "p10": -11.617061996459956, "median": 25.222904205322266, "p90": 110.15457611083986, "max": 162.48220825195312, "pos_frac": 0.78125, "sample": [132.0533905029297, 80.9509048461914, 23.721342086791992, 131.93966674804688, 12.130950927734375, 97.13858795166016, 10.636695861816406, 12.85916519165039, 27.97873878479004, 64.22712707519531, 116.4789047241211, 95.50030517578125, -44.341941833496094, 19.256412506103516, -13.383197784423828, -109.53096008300781, 111.85704040527344, 36.684295654296875, 51.98227310180664, -1.0469474792480469, -0.5131759643554688, 96.19681549072266, 104.75878143310547, 9.390523910522461, 8.084699630737305, 7.5177001953125, -20.06293296813965, 105.1866226196289, 5.701316833496094, 55.82530212402344, -30.063751220703125, 106.18215942382812, -7.4960784912109375, 7.496797561645508, 67.84424591064453, 37.400596618652344, 118.89497375488281, 152.5643768310547, 10.308395385742188, 1.3906478881835938, 41.84254455566406, 6.267923355102539, 30.896839141845703, 41.812255859375, -45.76280975341797, 12.638252258300781, -2.357576370239258, 49.188690185546875, 8.13046646118164, 37.32855224609375, 74.47779083251953, 58.8865966796875, 0.7512969970703125, -0.9938812255859375, 31.755706787109375, 15.877300262451172, 24.489051818847656, -5.586540222167969, 36.50511932373047, 162.48220825195312, -36.18525695800781, 25.956756591796875, 76.26829528808594, -1.0756607055664062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000306.npy"}
|
||||
{"epoch": 0.46258503401360546, "step": 307, "batch_size": 64, "mean": 17.255882263183594, "std": 65.49858856201172, "min": -114.41555786132812, "p10": -55.3151439666748, "median": 5.839519500732422, "p90": 114.81538009643556, "max": 141.74684143066406, "pos_frac": 0.5625, "sample": [103.57755279541016, -47.76818084716797, 8.918174743652344, -28.783226013183594, -106.9375, 63.69328308105469, 141.74684143066406, -37.098777770996094, 132.85105895996094, 98.34602355957031, -43.03535461425781, 23.427162170410156, 11.732540130615234, 130.30982971191406, -114.41555786132812, 41.85304260253906, 37.577178955078125, 1.0188751220703125, 80.56575775146484, -38.34471893310547, 48.549285888671875, -88.08414459228516, -2.909454345703125, -74.99700927734375, 13.619888305664062, -72.0633773803711, -42.29125213623047, 43.954689025878906, 58.72200012207031, -1.1014366149902344, -19.72692108154297, 16.829374313354492, -0.07221603393554688, -55.63969039916992, -54.55786895751953, 71.86666870117188, -48.41508483886719, -4.349555969238281, 112.87677764892578, -14.125869750976562, 2.534841537475586, -13.9273681640625, 0.27617645263671875, 129.44969177246094, 124.17996215820312, -34.553009033203125, 28.905776977539062, 115.64620971679688, 83.19055938720703, 42.42555236816406, -83.48516845703125, 34.66124725341797, 105.6661148071289, -1.4259471893310547, 61.744903564453125, 52.88256072998047, 2.7608642578125, 128.76214599609375, -51.256813049316406, -42.574012756347656, -51.70772933959961, 28.194442749023438, 96.05451202392578, -1.3479156494140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000307.npy"}
|
||||
{"epoch": 0.46409674981103555, "step": 308, "batch_size": 64, "mean": 34.75708770751953, "std": 60.92500305175781, "min": -133.74813842773438, "p10": -40.375798797607416, "median": 22.86959457397461, "p90": 120.05169754028321, "max": 146.88552856445312, "pos_frac": 0.75, "sample": [130.15957641601562, 65.7634048461914, 56.86888122558594, -37.878299713134766, 112.00582885742188, 86.33784484863281, -21.299034118652344, 35.10768127441406, 48.53861999511719, -6.426719665527344, 123.91014862060547, 116.51750183105469, 121.56635284423828, 15.117210388183594, 6.723297119140625, -58.03702926635742, 70.20430755615234, 113.636962890625, 61.13446807861328, 23.625598907470703, 72.19989013671875, -16.057994842529297, 132.36776733398438, 50.592002868652344, 16.44489288330078, 50.632484436035156, -3.324819564819336, 64.72701263427734, 67.35014343261719, -84.60951232910156, 146.88552856445312, 5.141319274902344, 22.113590240478516, -41.4461555480957, 53.40013122558594, 3.0998764038085938, 95.1409683227539, -1.7838058471679688, 1.28131103515625, -133.74813842773438, 9.77178955078125, 2.6005210876464844, 75.67341613769531, 135.05599975585938, 7.4522552490234375, 30.921875, 58.39488983154297, -21.71881103515625, 84.37019348144531, 7.269893646240234, 112.463134765625, 14.936676025390625, -14.334564208984375, 65.6428451538086, 114.55108642578125, 19.49182891845703, 128.82550048828125, -53.97797393798828, -57.45365905761719, 7.737947463989258, 8.746862411499023, -85.83238220214844, 11.587783813476562, -1.7063884735107422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000308.npy"}
|
||||
{"epoch": 0.4656084656084656, "step": 309, "batch_size": 64, "mean": 35.24372100830078, "std": 54.974212646484375, "min": -100.11761474609375, "p10": -16.74728851318359, "median": 30.826970100402832, "p90": 105.30524215698244, "max": 153.3257293701172, "pos_frac": 0.765625, "sample": [89.73455047607422, 145.97560119628906, 6.850009918212891, 107.40791320800781, 53.977996826171875, 1.6539077758789062, 84.93798828125, -11.217132568359375, 67.86813354492188, 11.311317443847656, 38.424346923828125, -1.3262939453125, -49.0071907043457, -12.034887313842773, 18.259414672851562, -90.91343688964844, 37.18646240234375, 98.18419647216797, -41.82323455810547, -6.52592658996582, 3.9532508850097656, 58.93863296508789, 91.51644897460938, 0.6283245086669922, 74.96322631835938, 88.99356079101562, 0.6778793334960938, 26.504911422729492, 21.965538024902344, 3.0720672607421875, 11.986082077026367, 17.531875610351562, -14.345962524414062, 119.45500183105469, 83.18061828613281, 45.135433197021484, 38.35581970214844, 0.6889686584472656, -9.734460830688477, 58.65912628173828, -10.082199096679688, 136.80911254882812, 78.25584411621094, 7.551055908203125, 153.3257293701172, -100.11761474609375, 100.39900970458984, 49.77830123901367, 26.99471092224121, 47.91154479980469, -13.977432250976562, 4.7306060791015625, 43.828216552734375, 58.67652893066406, 131.7099609375, 34.65922927856445, 47.6468505859375, 91.8060302734375, -42.76405334472656, -50.043212890625, 9.1148681640625, -17.77642822265625, 58.38854217529297, 137.7228546142578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000309.npy"}
|
||||
{"epoch": 0.4671201814058957, "step": 310, "batch_size": 64, "mean": 21.467906951904297, "std": 56.839813232421875, "min": -77.23418426513672, "p10": -43.245114135742185, "median": 10.893230438232422, "p90": 100.75163955688478, "max": 165.75579833984375, "pos_frac": 0.609375, "sample": [102.03131103515625, -2.4784622192382812, 20.111019134521484, 46.17255401611328, 28.693527221679688, -63.86437225341797, -17.6544189453125, 79.42057800292969, 97.76573944091797, -2.7832088470458984, -39.25651550292969, -33.14332580566406, 50.330039978027344, 128.38323974609375, 22.8359375, -4.503536224365234, 0.7082328796386719, 64.89749145507812, 146.8871612548828, -25.803688049316406, 5.193561553955078, -38.59872817993164, -77.23418426513672, 165.75579833984375, -28.829444885253906, -33.1818733215332, 48.97467803955078, -22.628875732421875, 19.5635986328125, 97.19135284423828, -49.47114562988281, 63.28193664550781, -4.9790802001953125, 94.75738525390625, -44.95451354980469, 46.686187744140625, 1.2379684448242188, 66.0171890258789, 7.483055114746094, 54.6690673828125, 5.584220886230469, 14.30340576171875, -67.31280517578125, -31.50426483154297, 50.949424743652344, 36.96736145019531, -4.8377685546875, 58.27763366699219, -1.0763702392578125, -8.471832275390625, 14.887397766113281, 119.80032348632812, 110.00769805908203, -68.72187805175781, 0.9329814910888672, -36.62226104736328, 0.643157958984375, 125.57200622558594, 30.436803817749023, -17.738677978515625, -63.57466125488281, 95.07251739501953, 17.88960075378418, 22.798683166503906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000310.npy"}
|
||||
{"epoch": 0.46863189720332576, "step": 311, "batch_size": 64, "mean": 36.216758728027344, "std": 58.4365348815918, "min": -146.31214904785156, "p10": -23.04516983032226, "median": 26.29584503173828, "p90": 116.09138793945317, "max": 241.62771606445312, "pos_frac": 0.75, "sample": [106.19438171386719, 3.15020751953125, 16.84088897705078, -7.806327819824219, -28.806137084960938, -26.584197998046875, 41.53004837036133, 120.61756134033203, -2.057065963745117, 79.74673461914062, 8.015762329101562, -146.31214904785156, 3.554546356201172, 71.98735046386719, 97.53109741210938, -1.1313648223876953, 88.16435241699219, 48.72461700439453, 126.25875854492188, 41.9879150390625, 4.703643798828125, 61.58349609375, 0.9200267791748047, 37.75347900390625, 90.74356842041016, 10.16671371459961, -28.348796844482422, -3.4471092224121094, 17.697586059570312, 29.44561767578125, -28.895614624023438, 144.4986572265625, 161.7807159423828, 40.96881103515625, 7.395809173583984, 55.97917175292969, 3.8406753540039062, 66.98484802246094, 22.456390380859375, 56.41546630859375, 241.62771606445312, 23.202804565429688, -0.73931884765625, 120.33296203613281, -44.44508361816406, 13.365234375, 17.107330322265625, -8.592155456542969, 44.73637390136719, 15.819412231445312, 138.55894470214844, 59.27203369140625, 58.7642822265625, -8.76258659362793, 31.747787475585938, 82.75182342529297, 1.3497810363769531, -45.997718811035156, -14.787437438964844, 29.388885498046875, -0.6935348510742188, 33.137916564941406, 79.56283569335938, 56.914154052734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000311.npy"}
|
||||
{"epoch": 0.47014361300075586, "step": 312, "batch_size": 64, "mean": 44.8417854309082, "std": 51.921627044677734, "min": -84.41460418701172, "p10": -12.136172866821287, "median": 37.39008712768555, "p90": 119.52647171020509, "max": 145.8786163330078, "pos_frac": 0.875, "sample": [29.267375946044922, 3.34033203125, -18.625762939453125, 11.4742431640625, 27.601234436035156, 4.397941589355469, 49.60513687133789, 0.23531723022460938, 26.005035400390625, 19.20124053955078, 138.576171875, 85.95208740234375, 86.51827239990234, 84.2886734008789, 2.5900192260742188, 78.98303985595703, 7.546562194824219, 1.287069320678711, 9.081747055053711, 77.62567138671875, 132.45681762695312, 15.606025695800781, 15.734546661376953, 16.071449279785156, 40.97480010986328, 118.87395477294922, 11.375297546386719, -9.438735961914062, 29.583053588867188, 70.48743438720703, 137.06788635253906, -57.935882568359375, 68.06107330322266, 56.54881286621094, -84.41460418701172, 41.0916862487793, 101.1268539428711, 17.190528869628906, 101.18966674804688, 145.8786163330078, 141.9498291015625, 1.035736083984375, 29.111312866210938, 62.810638427734375, 106.301513671875, 35.35447692871094, 39.425697326660156, 119.80612182617188, 56.409393310546875, -13.292217254638672, -17.815017700195312, 80.89972686767578, 128.6449432373047, 79.55519104003906, 28.991943359375, 83.94851684570312, 75.91413116455078, 44.90931701660156, -75.787109375, 82.27598571777344, 71.63788604736328, -20.07677459716797, 26.62091064453125, 8.761398315429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000312.npy"}
|
||||
{"epoch": 0.47165532879818595, "step": 313, "batch_size": 64, "mean": 23.659717559814453, "std": 55.492855072021484, "min": -87.21684265136719, "p10": -42.682349014282224, "median": 12.050809860229492, "p90": 100.15139694213867, "max": 169.33563232421875, "pos_frac": 0.6875, "sample": [2.394266128540039, 24.27788543701172, 52.808021545410156, 10.027416229248047, 14.074203491210938, 44.29480743408203, 3.2512474060058594, 30.93158531188965, 67.5131607055664, -67.36741638183594, 1.9225082397460938, 41.640052795410156, 24.63323211669922, 33.44956970214844, 5.876701354980469, -6.483001708984375, 2.9086227416992188, -38.8159294128418, -12.067489624023438, -2.4496307373046875, 49.971961975097656, 85.91509246826172, 4.475465774536133, 148.8935089111328, -87.21684265136719, -36.47723388671875, -7.118499755859375, 19.277923583984375, 98.91386413574219, 33.57263946533203, 140.40614318847656, 100.54508972167969, -64.36552429199219, -2.1175098419189453, 114.07783508300781, 32.047637939453125, 2.3635902404785156, 8.26784896850586, -3.5929641723632812, 135.97149658203125, 31.46767807006836, 23.106998443603516, 50.03521728515625, -7.648918151855469, 169.33563232421875, 0.058868408203125, -8.531723022460938, -17.663307189941406, 25.212081909179688, 0.6930007934570312, 99.15579986572266, 33.89128112792969, 45.94708251953125, -2.0022811889648438, -1.5586585998535156, -79.14002990722656, 74.38106536865234, -80.69422912597656, 114.65754699707031, -44.339385986328125, -47.75864791870117, 24.9110107421875, 4.84173583984375, 99.23278045654297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000313.npy"}
|
||||
{"epoch": 0.47316704459561604, "step": 314, "batch_size": 64, "mean": 47.22651672363281, "std": 59.1960563659668, "min": -64.43258666992188, "p10": -8.196665382385252, "median": 38.10506057739258, "p90": 135.33892974853515, "max": 170.31100463867188, "pos_frac": 0.75, "sample": [27.194229125976562, 6.765033721923828, 21.66100311279297, 138.55528259277344, 48.83274459838867, 85.12049102783203, -2.74945068359375, 106.9010238647461, -63.444053649902344, 42.976356506347656, 2.7621383666992188, -5.131366729736328, 159.29409790039062, 42.99461364746094, -5.188934326171875, 170.31100463867188, 51.12751388549805, 39.31059265136719, 122.6408920288086, 8.884300231933594, 24.9744873046875, -4.241302490234375, 120.24327087402344, 133.42750549316406, 122.23724365234375, -64.43258666992188, 27.28527069091797, 141.80897521972656, 6.2494659423828125, -1.162933349609375, 93.02597045898438, 51.8681640625, 27.401493072509766, -4.9758148193359375, 1.981985092163086, 46.50410079956055, 77.22821044921875, -24.202743530273438, 15.269380569458008, 89.1302490234375, -9.08540153503418, -0.4448699951171875, 136.15811157226562, 71.68734741210938, 118.80191040039062, 13.605010986328125, 82.53950500488281, 120.23574829101562, -6.122947692871094, 140.1337890625, 48.45091247558594, 126.54220581054688, 84.51611328125, 11.491378784179688, 89.49800109863281, 19.447982788085938, -20.066268920898438, 36.89952850341797, 13.320709228515625, -1.996246337890625, 44.35358428955078, -59.488128662109375, -53.532962799072266, 137.11422729492188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000314.npy"}
|
||||
{"epoch": 0.47467876039304613, "step": 315, "batch_size": 64, "mean": 35.78544616699219, "std": 54.003509521484375, "min": -102.59077453613281, "p10": -29.186462402343746, "median": 36.82982635498047, "p90": 106.92722244262696, "max": 144.92825317382812, "pos_frac": 0.71875, "sample": [-12.491950988769531, 81.28187561035156, 107.46170806884766, -4.44218635559082, -0.4125232696533203, 37.845436096191406, -102.59077453613281, -1.345998764038086, -69.747314453125, 35.81421661376953, -68.89147186279297, 32.815330505371094, -6.518756866455078, 131.33782958984375, -1.8828544616699219, 48.08306121826172, 27.8392333984375, -19.222938537597656, 11.849658966064453, 0.7925510406494141, 44.721168518066406, 72.67311096191406, 1.6166934967041016, 48.7327880859375, 26.071014404296875, 54.48978042602539, 59.59783935546875, -25.475082397460938, 39.5321159362793, 28.018972396850586, 41.30018615722656, 59.6038932800293, 102.29132843017578, 14.283767700195312, 29.55718231201172, 92.7117919921875, 102.00599670410156, -30.777053833007812, -43.175018310546875, 53.79945373535156, 106.9648666381836, -3.9796295166015625, 97.75885009765625, 118.03502655029297, -21.315292358398438, 67.11918640136719, 56.27399444580078, 144.92825317382812, -41.63138198852539, 25.985580444335938, 56.109283447265625, 5.218147277832031, 106.83938598632812, 1.3396377563476562, 80.72747802734375, 10.920242309570312, 100.51940155029297, 60.01239013671875, 121.87506103515625, -51.371826171875, 72.64997100830078, -5.881170272827148, 131.64077758789062, 50.376220703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000315.npy"}
|
||||
{"epoch": 0.47619047619047616, "step": 316, "batch_size": 64, "mean": 47.18065643310547, "std": 55.13800048828125, "min": -54.251556396484375, "p10": -16.759472656249997, "median": 45.306222915649414, "p90": 129.11682739257813, "max": 149.84548950195312, "pos_frac": 0.734375, "sample": [-3.673431396484375, 22.679931640625, 69.92915344238281, -27.49144744873047, -31.717811584472656, -10.101211547851562, 53.750030517578125, 41.091949462890625, 28.169334411621094, 122.07454681396484, 137.40933227539062, 94.853271484375, 81.422607421875, 94.37516784667969, 23.59918212890625, -43.99095153808594, -54.251556396484375, 19.06481170654297, -12.048158645629883, 100.23290252685547, 14.599031448364258, 131.1629638671875, -6.036106109619141, 5.970115661621094, 22.185043334960938, 43.09602737426758, 53.885894775390625, 8.009048461914062, -9.739856719970703, 26.030311584472656, 128.43043518066406, 93.81422424316406, 92.35476684570312, 149.1272430419922, -8.793573379516602, 62.46533203125, -17.599578857421875, 98.39307403564453, 67.44876861572266, 88.80464172363281, 125.22067260742188, 59.585182189941406, 46.91507339477539, -5.755130767822266, 129.41099548339844, 5.775430679321289, 1.3571128845214844, 149.84548950195312, -12.431678771972656, 55.423194885253906, -34.0308837890625, -29.70564842224121, 9.541641235351562, -14.799224853515625, 60.923309326171875, 43.69737243652344, -1.20343017578125, 130.37460327148438, 101.57914733886719, 101.92040252685547, 59.83263397216797, 52.74932861328125, 134.5305938720703, 99.82012176513672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000316.npy"}
|
||||
{"epoch": 0.47770219198790626, "step": 317, "batch_size": 64, "mean": 49.06956100463867, "std": 57.65219497680664, "min": -106.07009887695312, "p10": -12.654214477539062, "median": 44.96432685852051, "p90": 136.60079803466797, "max": 156.28512573242188, "pos_frac": 0.8125, "sample": [79.2249526977539, 82.76341247558594, 58.308414459228516, 96.672119140625, 46.807891845703125, 156.28512573242188, -21.618865966796875, 52.84697723388672, -37.51853942871094, 36.08305358886719, 3.6838951110839844, 44.498172760009766, -12.578346252441406, 11.424064636230469, 47.8768310546875, 39.11533737182617, 94.970947265625, 38.784393310546875, 87.47171783447266, 144.15785217285156, -49.65574645996094, -5.031364440917969, 37.34324645996094, -54.88285827636719, 37.507110595703125, 38.939453125, 112.4117431640625, 77.34163665771484, 80.01851654052734, -106.07009887695312, 136.58470153808594, 11.468582153320312, 65.58956909179688, 143.9967498779297, 136.60769653320312, -62.32933044433594, 57.45391845703125, 1.8685798645019531, 76.69023132324219, 145.64501953125, 18.167041778564453, 47.59820556640625, 147.70846557617188, -6.405998229980469, 26.14122772216797, 7.276676177978516, 21.855514526367188, 123.6649169921875, 13.502243041992188, -1.9424285888671875, 73.31803894042969, 27.061481475830078, 45.43048095703125, 139.88992309570312, -7.345451354980469, 55.09901428222656, -12.686729431152344, 61.121917724609375, 127.37976837158203, 104.88491821289062, 1.1672134399414062, 119.89385986328125, 36.65406799316406, 40.2608642578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000317.npy"}
|
||||
{"epoch": 0.47921390778533635, "step": 318, "batch_size": 64, "mean": 34.17611312866211, "std": 71.8602523803711, "min": -118.42784881591797, "p10": -57.703932952880855, "median": 30.794424057006836, "p90": 143.7298812866211, "max": 182.735595703125, "pos_frac": 0.671875, "sample": [33.34090805053711, 116.00753021240234, 5.9703521728515625, -87.86776733398438, 126.87259674072266, 123.346923828125, 39.30291748046875, -36.061370849609375, 64.9198226928711, 143.36367797851562, 20.682910919189453, -52.10387420654297, 64.44277954101562, 55.505767822265625, 130.48974609375, -2.664398193359375, 24.259368896484375, -45.199058532714844, 42.03956604003906, 3.4640026092529297, -16.40563201904297, 60.32080841064453, 144.08706665039062, -13.487861633300781, 7.611961364746094, 58.49103927612305, 148.2135467529297, -94.73252868652344, 113.89205932617188, 89.27883911132812, -26.08130645751953, 28.247940063476562, -48.273597717285156, 26.680561065673828, 2.1325912475585938, 182.735595703125, 167.85768127441406, -44.13756561279297, 143.88682556152344, 7.497718811035156, -60.10395812988281, 47.60023498535156, 61.02550506591797, -118.42784881591797, -1.381927490234375, 60.05381774902344, 21.559776306152344, 148.97291564941406, -1.0025558471679688, 90.24577331542969, 0.6113872528076172, 147.83779907226562, 46.16380310058594, 83.75102233886719, -81.66879272460938, -24.826141357421875, 68.33662414550781, -77.40901184082031, 52.80195617675781, -3.933868408203125, 79.20781707763672, -69.74723052978516, -28.16027069091797, 37.83618927001953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000318.npy"}
|
||||
{"epoch": 0.48072562358276644, "step": 319, "batch_size": 64, "mean": 47.89008331298828, "std": 65.82162475585938, "min": -88.8147201538086, "p10": -33.85972442626952, "median": 50.24041748046875, "p90": 141.13989562988283, "max": 190.4395751953125, "pos_frac": 0.75, "sample": [52.925132751464844, 148.8020782470703, 20.18280029296875, 146.00888061523438, 24.535184860229492, 119.60073852539062, 2.575946807861328, 50.909454345703125, 48.25836181640625, 51.24462890625, 134.172607421875, 123.76692962646484, -4.301998138427734, 0.8776168823242188, 16.904190063476562, 62.1956787109375, 11.183326721191406, 158.47213745117188, 4.747245788574219, 84.2537841796875, 4.775318145751953, -25.545120239257812, 52.121185302734375, 70.81741333007812, 172.2425994873047, -8.307846069335938, 138.31417846679688, 69.61246490478516, 141.78335571289062, 137.80662536621094, -37.423126220703125, -62.68754577636719, 54.961090087890625, 190.4395751953125, -7.736358642578125, 72.56071472167969, -45.87532043457031, 99.54164123535156, 68.1841049194336, -58.602840423583984, 139.63848876953125, 49.571380615234375, -18.393882751464844, -2.3708267211914062, 11.740013122558594, 14.20728874206543, -74.63203430175781, -6.4373931884765625, -55.385581970214844, 102.83309936523438, -9.413742065429688, 39.35498046875, 30.1971435546875, -20.280258178710938, 37.70136260986328, 104.21206665039062, 63.947288513183594, 73.35594177246094, 76.03128051757812, 13.695981979370117, 90.45626831054688, -88.8147201538086, 145.00762939453125, 64.44691467285156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000319.npy"}
|
||||
{"epoch": 0.48223733938019653, "step": 320, "batch_size": 64, "mean": 39.52253723144531, "std": 66.62867736816406, "min": -131.54820251464844, "p10": -28.594595527648917, "median": 9.512622833251953, "p90": 128.54203186035159, "max": 220.0931396484375, "pos_frac": 0.671875, "sample": [103.02229309082031, -47.76602554321289, 0.8324699401855469, 48.09295654296875, 11.346473693847656, 43.339134216308594, 131.90573120117188, 3.175323486328125, 90.69481658935547, -2.3284168243408203, -38.47651672363281, 125.1287841796875, -0.17903709411621094, -0.6333999633789062, -50.00530242919922, 123.67608642578125, 68.93758392333984, 4.130439758300781, -19.852203369140625, 103.77774047851562, -20.159223556518555, 2.1303367614746094, -131.54820251464844, 0.7805576324462891, 9.438919067382812, 220.0931396484375, 151.919189453125, 122.29181671142578, 115.24354553222656, -11.764083862304688, 51.989158630371094, -18.027381896972656, 9.586326599121094, 7.652801513671875, -0.7141895294189453, 130.00485229492188, 7.8141632080078125, -32.209754943847656, -6.219882965087891, -11.960468292236328, 49.63327407836914, 151.9725799560547, 7.868476867675781, 63.04829406738281, 7.1714630126953125, 64.23997497558594, 154.98348999023438, 56.97718811035156, -7.667510986328125, -39.162689208984375, 2.760822296142578, -62.647666931152344, 156.17391967773438, -5.311248779296875, -5.244976043701172, 56.701812744140625, 16.02178955078125, 105.23585510253906, 99.7298583984375, 71.78388977050781, 73.92291259765625, -7.91937255859375, 115.95120239257812, 108.05848693847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000320.npy"}
|
||||
{"epoch": 0.4837490551776266, "step": 321, "batch_size": 64, "mean": 44.25755310058594, "std": 65.70555877685547, "min": -111.31869506835938, "p10": -52.35037765502929, "median": 37.72460174560547, "p90": 131.5710662841797, "max": 146.0404815673828, "pos_frac": 0.78125, "sample": [113.52162170410156, 91.86654663085938, 2.768634796142578, 111.76737213134766, 113.63726806640625, -33.343788146972656, 146.0404815673828, 87.69175720214844, 133.19003295898438, 43.430397033691406, 144.67823791503906, 37.00724411010742, 50.95210647583008, 74.13985443115234, -111.31869506835938, 7.924465179443359, 24.26386260986328, 72.12708282470703, 36.720245361328125, 128.60809326171875, 37.82398986816406, 22.276214599609375, -58.533714294433594, -0.4930572509765625, 52.49169158935547, -78.52360534667969, -78.55833435058594, -75.63607788085938, 67.40892028808594, 29.304855346679688, 34.42732238769531, 55.86666488647461, 111.7677001953125, 76.41371154785156, 4.267860412597656, 99.31106567382812, 69.44642639160156, -1.3777103424072266, 31.195301055908203, 128.85357666015625, 2.7701950073242188, -1.6771354675292969, 31.381969451904297, 104.71845245361328, -0.8164825439453125, -96.90435791015625, 128.71807861328125, 145.46218872070312, 130.02313232421875, 66.95613098144531, 8.129653930664062, 3.712686538696289, 37.625213623046875, 132.2935028076172, 132.23446655273438, -11.551933288574219, 31.169151306152344, 69.26731872558594, 144.16815185546875, -44.09272003173828, 14.074485778808594, 0.356536865234375, 56.948646545410156, -55.889373779296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000321.npy"}
|
||||
{"epoch": 0.4852607709750567, "step": 322, "batch_size": 64, "mean": 52.944183349609375, "std": 60.764747619628906, "min": -106.28208923339844, "p10": -11.260013580322266, "median": 45.0853271484375, "p90": 137.9376693725586, "max": 150.37905883789062, "pos_frac": 0.859375, "sample": [-15.897260665893555, 76.60225677490234, 141.21234130859375, 19.841781616210938, 107.4521484375, 11.027732849121094, 150.37905883789062, 106.19461822509766, 135.0933074951172, 29.72498893737793, 42.062591552734375, 52.599090576171875, 133.51251220703125, 145.79676818847656, 62.761322021484375, 2.4183197021484375, 19.248565673828125, 53.80943298339844, 4.324134826660156, 14.54473876953125, 84.27100372314453, 4.060966491699219, -83.63509368896484, 134.71629333496094, 124.20018005371094, 10.575305938720703, 32.711761474609375, 30.67154884338379, 0.3938331604003906, -8.336593627929688, -46.56398010253906, 103.95938110351562, 57.33982849121094, 131.78759765625, 107.093994140625, 63.89491271972656, 13.983894348144531, 144.170166015625, 7.93768310546875, -11.34503173828125, 74.95124816894531, 50.29228210449219, 1.468057632446289, 137.0955352783203, 40.000099182128906, 137.08505249023438, 138.298583984375, 83.02828979492188, -24.057647705078125, -106.28208923339844, 40.41731643676758, 14.631698608398438, 28.775257110595703, 52.48616027832031, 141.20460510253906, 32.90754699707031, 48.108062744140625, 35.77134704589844, 74.99102783203125, -52.38218307495117, 143.6941680908203, -11.061637878417969, 91.79155731201172, 20.617250442504883], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000322.npy"}
|
||||
{"epoch": 0.48677248677248675, "step": 323, "batch_size": 64, "mean": 34.65920639038086, "std": 73.5648193359375, "min": -129.0900115966797, "p10": -44.23508148193359, "median": 11.432048797607422, "p90": 143.87883758544922, "max": 177.05331420898438, "pos_frac": 0.640625, "sample": [-1.5988998413085938, -95.29885864257812, 72.19021606445312, 40.96446990966797, -11.504051208496094, 119.70956420898438, 23.766494750976562, 13.317852020263672, 7.764808654785156, 137.78427124023438, 39.802978515625, -22.075483322143555, 141.67254638671875, 6.032127380371094, 129.6641845703125, -38.468875885009766, -56.1123046875, 88.20068359375, 112.66439056396484, 3.0932464599609375, 58.93943786621094, 144.21339416503906, 5.052490234375, 11.440055847167969, 155.99810791015625, -5.025596618652344, -45.46403503417969, 79.90052032470703, 0.7877407073974609, -25.74444580078125, 177.05331420898438, -41.367523193359375, 11.424041748046875, -70.15461730957031, 147.5341033935547, 12.929450988769531, -84.79559326171875, 140.65191650390625, 72.59591674804688, -0.34372520446777344, 9.587089538574219, 67.46163177490234, -5.85064697265625, -7.711250305175781, 159.0106658935547, 159.81321716308594, 143.09820556640625, -129.0900115966797, 145.25503540039062, 2.3598556518554688, -123.5713882446289, -2.0665359497070312, 99.77053833007812, 14.049728393554688, -17.42926025390625, 81.81623077392578, 84.48544311523438, 57.87342834472656, 8.958145141601562, -3.817241668701172, 39.69650650024414, -3.0149002075195312, -1.9100189208984375, -17.779617309570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000323.npy"}
|
||||
{"epoch": 0.48828420256991684, "step": 324, "batch_size": 64, "mean": 49.47395324707031, "std": 60.19646453857422, "min": -102.75109100341797, "p10": -8.916389656066894, "median": 35.818294525146484, "p90": 134.01270294189453, "max": 171.37435913085938, "pos_frac": 0.828125, "sample": [79.06590270996094, 43.8226318359375, 28.295120239257812, 9.749778747558594, -73.30532836914062, 151.0709686279297, 60.451927185058594, 62.21247863769531, -23.165328979492188, 0.4501953125, -0.7421646118164062, 123.03262329101562, 78.75306701660156, 50.259342193603516, 107.23502349853516, 19.401348114013672, 134.49594116210938, -29.975311279296875, 87.98152923583984, 2.58416748046875, 44.96800231933594, 30.91162109375, 1.4310417175292969, 85.36685943603516, 32.648902893066406, 13.690452575683594, -4.411018371582031, 31.650230407714844, 171.37435913085938, 11.506595611572266, 17.955535888671875, -102.75109100341797, 53.96405029296875, 130.30101013183594, 21.831954956054688, 63.222591400146484, 27.684364318847656, -9.49567985534668, 87.33216857910156, 98.40277099609375, 38.98768615722656, -38.99845886230469, 170.25399780273438, 18.693153381347656, 147.86932373046875, 24.985668182373047, 1.1573562622070312, 132.88514709472656, 61.120662689208984, 129.18365478515625, 155.90557861328125, 0.4935417175292969, 3.925323486328125, 110.73661804199219, 140.03965759277344, -2.6346588134765625, 45.84300994873047, -7.5647125244140625, -11.726463317871094, 50.951656341552734, 124.54872131347656, 122.90365600585938, 25.2550048828125, 2.2592411041259766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000324.npy"}
|
||||
{"epoch": 0.4897959183673469, "step": 325, "batch_size": 64, "mean": 44.60791015625, "std": 70.3342514038086, "min": -177.84791564941406, "p10": -35.6388328552246, "median": 42.98495864868164, "p90": 130.91165466308595, "max": 187.34457397460938, "pos_frac": 0.75, "sample": [35.472965240478516, 127.48670959472656, 2.244495391845703, 114.25502014160156, -39.47309112548828, 88.43934631347656, 89.37855529785156, 58.82585906982422, 63.00129699707031, 61.409385681152344, 66.59007263183594, 120.23729705810547, -8.52537727355957, -26.692230224609375, 36.96110534667969, -7.953922271728516, 131.9720001220703, 100.01638793945312, 119.48265838623047, -111.43576049804688, 155.62820434570312, -24.21721649169922, 123.49689483642578, 1.647146224975586, -0.5915451049804688, 160.13876342773438, 17.51291275024414, 128.43751525878906, -53.334171295166016, 56.94122314453125, -79.78251647949219, 44.82692337036133, 101.82836151123047, 41.33050537109375, -69.55335235595703, 51.463592529296875, -56.10879135131836, 117.45425415039062, 39.974998474121094, 145.83843994140625, 63.56282043457031, 20.208173751831055, 11.879730224609375, 187.34457397460938, -177.84791564941406, 44.63941192626953, 38.36224365234375, 20.585662841796875, 111.73170471191406, 97.16615295410156, 146.91925048828125, -7.87908935546875, 37.41520690917969, -11.07940673828125, 10.677101135253906, -14.704124450683594, 47.41139221191406, 54.34565353393555, 151.2759552001953, 49.3524169921875, 14.492172241210938, 34.495079040527344, 18.64422607421875, -18.71710205078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000325.npy"}
|
||||
{"epoch": 0.491307634164777, "step": 326, "batch_size": 64, "mean": 19.879615783691406, "std": 68.08990478515625, "min": -110.77981567382812, "p10": -75.50988006591797, "median": 9.533332824707031, "p90": 110.36482315063478, "max": 153.04664611816406, "pos_frac": 0.578125, "sample": [-13.490776062011719, -6.645900726318359, 6.799457550048828, 26.467205047607422, 34.898101806640625, -103.07096862792969, 153.04664611816406, 105.70477294921875, -5.966094970703125, 80.1830062866211, -47.93000793457031, 43.9208869934082, 21.497478485107422, 3.57452392578125, 60.00519943237305, 104.6202163696289, 7.007802963256836, 82.77883911132812, -6.8821563720703125, 37.63493347167969, 123.18614196777344, -2.2154083251953125, 24.30621337890625, 75.97964477539062, -80.0971908569336, -38.817718505859375, -110.77981567382812, -13.961090087890625, -27.651519775390625, -6.273704528808594, 148.57925415039062, -103.12090301513672, 30.22620391845703, -6.300312042236328, 45.139808654785156, 7.853721618652344, 4.6719970703125, -31.647003173828125, -52.82581329345703, -17.080902099609375, 100.80175018310547, 139.67242431640625, -78.27392578125, -5.885154724121094, 119.34974670410156, 27.744232177734375, 21.17374038696289, -103.198486328125, 101.32013702392578, 139.78688049316406, -3.508892059326172, -4.014814376831055, -68.18626403808594, 111.29505157470703, -2.7286376953125, 11.212944030761719, -104.38739013671875, 73.10446166992188, 65.73117065429688, 17.225006103515625, 106.86784362792969, -69.06044006347656, 14.734954833984375, 108.19429016113281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000326.npy"}
|
||||
{"epoch": 0.4928193499622071, "step": 327, "batch_size": 64, "mean": 35.64569091796875, "std": 71.77677917480469, "min": -114.15055847167969, "p10": -66.31155471801758, "median": 28.003013610839844, "p90": 136.43516082763676, "max": 175.7239990234375, "pos_frac": 0.6875, "sample": [49.85962677001953, -63.73925018310547, -24.08263397216797, 127.57022094726562, 46.56494140625, 32.0826416015625, -84.94461822509766, 68.5473861694336, -92.9422378540039, 79.53678894042969, 152.41757202148438, 24.782352447509766, -87.28427124023438, 118.29754638671875, 31.69420623779297, 140.5382537841797, 127.13583374023438, 12.30105972290039, 41.43736267089844, -67.41397094726562, 0.21354103088378906, 9.130191802978516, 60.259605407714844, 168.20858764648438, 140.2344207763672, 18.574722290039062, 25.577377319335938, 91.23130798339844, 77.96123504638672, -1.16595458984375, 18.040679931640625, 4.429248809814453, -88.08938598632812, 112.21308898925781, 104.00332641601562, -6.410163879394531, 44.18522644042969, 33.64015197753906, -5.700469970703125, 1.9879875183105469, 149.64273071289062, 20.671329498291016, -114.15055847167969, -0.8907012939453125, 102.4590835571289, 175.7239990234375, -39.81584167480469, 118.46780395507812, 142.97662353515625, 37.14423370361328, 84.8111801147461, -4.459573745727539, 21.547069549560547, 7.466739654541016, -41.70695114135742, -21.145639419555664, 119.7935562133789, 114.0151596069336, 30.42864990234375, -4.715511322021484, -13.879127502441406, -99.70130920410156, 62.850372314453125, -7.0928497314453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000327.npy"}
|
||||
{"epoch": 0.4943310657596372, "step": 328, "batch_size": 64, "mean": 43.420433044433594, "std": 64.16773986816406, "min": -119.3494644165039, "p10": -20.92937545776367, "median": 23.948633193969727, "p90": 133.16045989990235, "max": 174.28060913085938, "pos_frac": 0.75, "sample": [102.05836486816406, 3.824373245239258, 6.187341690063477, 35.49928283691406, 127.32867431640625, 4.411413192749023, -7.222385406494141, 99.91738891601562, 74.81609344482422, 18.136314392089844, 13.249580383300781, 118.87213134765625, 133.21839904785156, 12.97372055053711, 58.1512451171875, -51.52204895019531, 142.78732299804688, 17.89174461364746, 28.440811157226562, -5.353702545166016, 27.953262329101562, -10.0841064453125, 4.863813400268555, 28.543392181396484, -21.586502075195312, -33.41039276123047, 161.1593017578125, 16.328330993652344, -75.40684509277344, 33.741966247558594, -119.3494644165039, 125.83844757080078, 97.34208679199219, 2.353302001953125, 134.76658630371094, -19.396080017089844, 20.8388671875, -8.255126953125, 174.28060913085938, 25.466064453125, 125.21746826171875, 145.68357849121094, -3.4080429077148438, -31.56053352355957, 14.516616821289062, 120.2608413696289, -3.2793025970458984, 11.192855834960938, -6.2745208740234375, 22.431201934814453, 77.4255142211914, -48.51716613769531, 86.42801666259766, 25.793060302734375, 13.237747192382812, 81.39483642578125, 106.56385040283203, -1.4932613372802734, 133.0252685546875, 36.63451385498047, 103.4280014038086, 0.6696434020996094, 149.1649627685547, 120.71904754638672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000328.npy"}
|
||||
{"epoch": 0.4958427815570673, "step": 329, "batch_size": 64, "mean": 44.84990310668945, "std": 64.9861068725586, "min": -79.9104232788086, "p10": -27.434095001220697, "median": 30.683728218078613, "p90": 146.81427001953125, "max": 180.67709350585938, "pos_frac": 0.734375, "sample": [16.405576705932617, 18.001422882080078, 55.77159118652344, 44.2423095703125, -60.06062316894531, 28.72637176513672, 180.67709350585938, -14.980789184570312, 89.98281860351562, 31.636207580566406, 109.86935424804688, 51.962066650390625, -9.766973495483398, 102.1326904296875, 1.1488208770751953, 22.097557067871094, 38.468353271484375, 120.53564453125, -9.79876708984375, 33.877288818359375, 0.61810302734375, 29.73124885559082, 151.1182098388672, 52.246368408203125, -79.9104232788086, 147.88055419921875, 154.2360382080078, 118.84952545166016, 43.8526611328125, 33.390480041503906, 150.40542602539062, 120.12898254394531, -35.271522521972656, 103.45501708984375, 1.512258529663086, 7.6156158447265625, 139.2108154296875, 1.3813304901123047, 145.17428588867188, 83.65391540527344, 162.3338623046875, 124.49859619140625, 5.857433319091797, -62.17057800292969, -5.104095458984375, 133.3150634765625, 2.1218605041503906, 26.430084228515625, 14.188827514648438, 36.541873931884766, -23.35824203491211, 43.427825927734375, -0.2023162841796875, -29.180889129638672, 147.51712036132812, 93.68170166015625, -14.238664627075195, 79.69567108154297, -12.489120483398438, -51.19439697265625, 25.670639038085938, -0.4999542236328125, -0.3083209991455078, -46.34727478027344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000329.npy"}
|
||||
{"epoch": 0.4973544973544973, "step": 330, "batch_size": 64, "mean": 48.92347717285156, "std": 79.25326538085938, "min": -149.01187133789062, "p10": -58.8508918762207, "median": 54.97176742553711, "p90": 145.30751953125, "max": 173.5733642578125, "pos_frac": 0.734375, "sample": [1.8088912963867188, -24.40923309326172, -15.359312057495117, -131.34547424316406, 54.388450622558594, -149.01187133789062, 118.98359680175781, 23.315696716308594, -28.550437927246094, 137.127197265625, 135.40487670898438, 63.57240295410156, -91.17073059082031, -1.3058242797851562, 20.575366973876953, 144.75564575195312, 156.35791015625, -72.06969451904297, 173.5733642578125, 146.17172241210938, 95.47401428222656, 0.35720062255859375, 120.83663940429688, 2.580911636352539, 137.4235382080078, 122.59577941894531, -0.5019454956054688, 11.580757141113281, -55.83051300048828, 6.4262542724609375, 65.68788146972656, 120.02881622314453, -6.0446929931640625, 56.28852462768555, -68.52847290039062, 134.8079833984375, 100.22856903076172, 150.22601318359375, 88.09367370605469, 18.26775360107422, 7.671211242675781, 102.17988586425781, 62.54156494140625, 106.19417572021484, 137.84243774414062, 133.20982360839844, 1.8574504852294922, 61.33208084106445, -3.9136962890625, 24.941146850585938, -0.956573486328125, 20.018096923828125, 145.54403686523438, 131.97784423828125, -33.73066711425781, 24.960250854492188, 55.555084228515625, -60.14533996582031, 155.20330810546875, -93.7376708984375, 109.36495971679688, 128.70281982421875, 2.4920120239257812, 149.18685913085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000330.npy"}
|
||||
{"epoch": 0.4988662131519274, "step": 331, "batch_size": 64, "mean": 59.86686706542969, "std": 73.44036865234375, "min": -151.8413543701172, "p10": -11.28609657287597, "median": 71.17495727539062, "p90": 148.15316009521484, "max": 218.5478057861328, "pos_frac": 0.8125, "sample": [154.98097229003906, 9.345718383789062, 23.838871002197266, 101.1202392578125, 121.2125244140625, 156.20565795898438, 96.46516418457031, 85.51350402832031, 106.00506591796875, 4.7824249267578125, -4.204212188720703, 166.4769744873047, 142.87953186035156, 145.90985107421875, 121.24226379394531, 9.754463195800781, 159.45742797851562, 38.57249450683594, 36.670921325683594, 149.1145782470703, 105.50776672363281, 3.57159423828125, 9.929489135742188, 7.04168701171875, -22.223590850830078, 140.69580078125, 97.33184051513672, 218.5478057861328, 21.541152954101562, 111.57388305664062, 86.57171630859375, 55.12583541870117, 144.7687530517578, 83.33856201171875, 0.2136688232421875, -1.1878890991210938, 68.09903717041016, -94.98617553710938, -81.08383178710938, 74.2508773803711, -67.95378112792969, 157.51864624023438, 35.541412353515625, -2.536346435546875, 30.0589542388916, -151.8413543701172, 84.2622299194336, -0.039093017578125, -14.321189880371094, 24.810625076293945, 21.39429473876953, 96.624755859375, 91.72650146484375, 34.9990234375, 52.046958923339844, 5.925880432128906, -0.8603086471557617, 133.96749877929688, 88.09304809570312, -87.85578918457031, 144.1114959716797, 82.35678100585938, 76.35528564453125, 143.12155151367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000331.npy"}
|
||||
{"epoch": 0.5003779289493575, "step": 332, "batch_size": 64, "mean": 49.65996551513672, "std": 66.72908782958984, "min": -96.75276184082031, "p10": -4.722867012023926, "median": 26.077957153320312, "p90": 147.5967987060547, "max": 179.15301513671875, "pos_frac": 0.78125, "sample": [-53.62648391723633, -2.5827407836914062, 4.694648742675781, -1.2331008911132812, 15.790117263793945, 120.07485961914062, 14.14410400390625, 31.00650978088379, 23.166610717773438, 37.15902328491211, 0.7907638549804688, 179.15301513671875, 111.93501281738281, 78.46665954589844, 146.24217224121094, 22.522354125976562, 15.803314208984375, 0.10871124267578125, 96.25233459472656, 45.01832580566406, 83.65921783447266, 11.061874389648438, -96.75276184082031, 144.40904235839844, 114.72300720214844, 116.02872467041016, -64.67548370361328, 16.794410705566406, 4.5193023681640625, -4.782447814941406, 42.412986755371094, 148.17735290527344, 21.815948486328125, -3.2487030029296875, -2.10601806640625, 28.957046508789062, -2.928579330444336, 168.02496337890625, -79.3561019897461, 8.797208786010742, 103.48443603515625, 131.4069366455078, 104.4375991821289, -4.583845138549805, 24.974044799804688, 3.6340255737304688, 105.09123229980469, 121.10704040527344, 119.68727111816406, 60.085296630859375, 164.22816467285156, 7.785125732421875, 67.52784729003906, -1.60211181640625, 28.418373107910156, -18.739456176757812, 151.86865234375, 5.638904571533203, 19.422500610351562, 152.1380615234375, 177.119873046875, 118.82014465332031, -31.311573028564453, 27.181869506835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000332.npy"}
|
||||
{"epoch": 0.5018896447467877, "step": 333, "batch_size": 64, "mean": 52.69178009033203, "std": 66.81466674804688, "min": -108.35968780517578, "p10": -13.51795978546142, "median": 47.002681732177734, "p90": 152.48191375732424, "max": 193.49844360351562, "pos_frac": 0.78125, "sample": [13.950111389160156, -16.273344039916992, 10.533905029296875, 81.32247924804688, 18.58319091796875, -0.6412086486816406, 29.645885467529297, 61.37034606933594, 5.615024566650391, -47.85498046875, 51.282249450683594, 77.44168090820312, 96.74337005615234, 117.06529998779297, 136.97610473632812, 25.035003662109375, 52.7381591796875, 152.97300720214844, 176.46990966796875, 158.1572723388672, -7.0887298583984375, 72.698486328125, 52.63037872314453, 37.910491943359375, 52.85999298095703, -56.74597930908203, -28.675647735595703, 48.526092529296875, 1.1535568237304688, 45.479270935058594, 153.2975616455078, 146.23191833496094, -20.171646118164062, 58.18639373779297, 103.62413024902344, 28.301631927490234, 172.1357421875, 4.297843933105469, 9.532184600830078, 24.698638916015625, -0.9511432647705078, 55.13189697265625, -108.35968780517578, 3.889371871948242, 59.187339782714844, -73.06730651855469, 193.49844360351562, 32.09900665283203, -4.864936828613281, 28.660789489746094, -4.612091064453125, -3.873779296875, 101.72112274169922, 151.33602905273438, 6.678266525268555, -1.1455764770507812, 89.92040252685547, 8.732612609863281, 141.906005859375, 61.005615234375, 179.05023193359375, 88.1335220336914, 137.30459594726562, 130.87710571289062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000333.npy"}
|
||||
{"epoch": 0.5034013605442177, "step": 334, "batch_size": 64, "mean": 38.0953254699707, "std": 76.85939025878906, "min": -133.00387573242188, "p10": -59.42869873046874, "median": 22.846996307373047, "p90": 147.33215484619143, "max": 188.5601806640625, "pos_frac": 0.6875, "sample": [-65.14391326904297, -133.00387573242188, 60.209251403808594, 82.3730239868164, 70.92793273925781, -16.399261474609375, -115.69287109375, 120.61937713623047, 21.797752380371094, -127.23607635498047, 5.2492828369140625, 188.5601806640625, -10.444555282592773, 85.46868896484375, 2.9039840698242188, 96.17493438720703, -22.964996337890625, -19.781137466430664, 33.14520263671875, 96.03517150878906, 87.18942260742188, 160.84078979492188, 24.795555114746094, 83.47598266601562, 87.71693420410156, -65.13983154296875, 23.896240234375, 0.35521507263183594, 130.62429809570312, 6.947574615478516, -2.706392288208008, 0.85675048828125, 155.79348754882812, -104.921142578125, 90.79449462890625, 149.33644104003906, 57.682861328125, 20.690750122070312, 26.206756591796875, -3.2056808471679688, 6.022163391113281, -23.65325164794922, -8.346969604492188, -14.803054809570312, 151.23605346679688, -0.4299468994140625, 0.5358657836914062, 8.304779052734375, -46.10272216796875, 108.29290771484375, 122.55326080322266, 57.992225646972656, -68.09867858886719, 101.46952819824219, -43.332679748535156, 21.4732666015625, 39.27134704589844, 155.9081573486328, -2.901214599609375, 17.3280086517334, 142.65548706054688, 104.04737091064453, 183.39215087890625, 141.25814819335938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000334.npy"}
|
||||
{"epoch": 0.5049130763416477, "step": 335, "batch_size": 64, "mean": 43.97538375854492, "std": 73.06713104248047, "min": -116.17575073242188, "p10": -40.06815948486328, "median": 29.949918746948242, "p90": 152.032275390625, "max": 182.54904174804688, "pos_frac": 0.75, "sample": [101.42842102050781, 71.83092498779297, -33.924537658691406, 176.15957641601562, 29.211456298828125, 92.45156860351562, 35.68141174316406, 1.4838504791259766, 182.54904174804688, 103.2912826538086, 160.51358032226562, 143.40325927734375, 30.270858764648438, 66.64848327636719, 155.10092163085938, 148.58090209960938, 7.107917785644531, 24.44432258605957, 29.807472229003906, 152.99928283691406, -43.72079849243164, 5.2758331298828125, 51.32190704345703, 160.83837890625, -44.61622619628906, 3.07733154296875, 93.19902038574219, -32.99974060058594, -24.57657814025879, 70.9844741821289, 140.1142120361328, 7.901023864746094, 5.4843597412109375, 33.675331115722656, 134.84417724609375, -41.62592697143555, 152.05862426757812, -4.070217132568359, 22.611000061035156, 151.97079467773438, 7.910179138183594, -6.45768928527832, 37.09068298339844, 25.210594177246094, 68.95439910888672, 100.36573028564453, -15.765480041503906, -105.94429016113281, 30.092365264892578, 1.025125503540039, 52.83638000488281, -36.43336868286133, -8.497123718261719, -116.17575073242188, -112.63340759277344, -46.827415466308594, 137.76087951660156, 108.60838317871094, 1.6452789306640625, 82.73709106445312, -6.2699737548828125, 8.980369567871094, 13.351509094238281, 72.07308959960938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000335.npy"}
|
||||
{"epoch": 0.5064247921390779, "step": 336, "batch_size": 64, "mean": 44.56208801269531, "std": 74.8921127319336, "min": -102.9402847290039, "p10": -42.52296142578125, "median": 27.31346035003662, "p90": 152.43649139404297, "max": 222.07095336914062, "pos_frac": 0.734375, "sample": [-57.100982666015625, -1.006011962890625, 141.54556274414062, 36.745819091796875, 222.07095336914062, 150.08705139160156, 69.89785766601562, -1.8910446166992188, 153.23394775390625, 1.063455581665039, 2.9214344024658203, -5.669382095336914, 156.57521057128906, -7.3436126708984375, 136.47401428222656, 9.38254165649414, 7.146575927734375, 96.98837280273438, 169.14666748046875, -94.77875518798828, -39.54405212402344, 57.82106018066406, 167.85037231445312, 62.81102752685547, -5.2438507080078125, 122.77511596679688, 39.89101028442383, -102.9402847290039, -30.40191650390625, -58.366432189941406, 169.08204650878906, -71.21542358398438, 56.95423126220703, 27.72698974609375, 13.05307388305664, 9.336713790893555, 1.0378570556640625, -2.6931533813476562, 60.705711364746094, 140.15789794921875, 5.179328918457031, -17.30449676513672, 64.40467834472656, 6.9951324462890625, 134.5712127685547, 32.5556640625, 65.71055603027344, 0.019378662109375, 26.899930953979492, 108.52224731445312, 7.220184326171875, 1.89697265625, 81.94373321533203, 150.5757598876953, 217.1254425048828, 61.58275604248047, -57.413970947265625, 6.874053955078125, 74.01631927490234, -18.73712158203125, 33.799530029296875, 86.1690673828125, 18.879531860351562, -43.79963684082031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000336.npy"}
|
||||
{"epoch": 0.5079365079365079, "step": 337, "batch_size": 64, "mean": 35.575401306152344, "std": 67.21504211425781, "min": -134.653076171875, "p10": -36.01326599121092, "median": 30.03717803955078, "p90": 129.94898223876956, "max": 179.3150634765625, "pos_frac": 0.703125, "sample": [8.032524108886719, -75.63048553466797, -134.653076171875, -22.884613037109375, 38.03050231933594, 114.59344482421875, -5.365999221801758, 123.51789855957031, 38.05877685546875, -7.296379089355469, 139.21502685546875, 17.99156951904297, 44.739463806152344, -120.24134826660156, -41.63983154296875, -14.038078308105469, 31.492752075195312, 91.80563354492188, 36.82117462158203, -17.786117553710938, 7.960990905761719, 1.1735095977783203, -11.010574340820312, -63.55056381225586, 111.12513732910156, 86.16002655029297, -9.731719970703125, 36.838836669921875, 47.94691467285156, 165.8656768798828, 22.221214294433594, 4.334861755371094, 140.3975830078125, -4.60968017578125, 120.89071655273438, 81.29205322265625, 131.4999542236328, -6.8757476806640625, 0.8557853698730469, 78.88104248046875, 147.00640869140625, 9.106803894042969, 137.4484405517578, 20.065948486328125, 69.95431518554688, -20.816909790039062, 76.02985382080078, 11.734695434570312, 41.789581298828125, 28.58160400390625, 76.43598937988281, 179.3150634765625, -0.8810806274414062, -16.778594970703125, 32.97431945800781, 96.11351013183594, 7.601770401000977, -42.33654022216797, 75.41790771484375, 78.34801483154297, -100.04179382324219, 18.7261962890625, 126.33004760742188, 38.27138137817383], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000337.npy"}
|
||||
{"epoch": 0.509448223733938, "step": 338, "batch_size": 64, "mean": 37.98067855834961, "std": 71.07381439208984, "min": -149.0956268310547, "p10": -27.820951461791992, "median": 20.669565200805664, "p90": 143.07212677001957, "max": 168.91796875, "pos_frac": 0.765625, "sample": [-19.567596435546875, 21.74545669555664, 151.55303955078125, 132.11863708496094, 135.789794921875, 122.82099151611328, -100.0429458618164, 118.34606170654297, 19.593673706054688, 145.7284698486328, 162.052978515625, 10.048652648925781, 1.9940948486328125, 24.871368408203125, 2.6729049682617188, -81.25697326660156, 7.370086669921875, -103.58134460449219, -5.8422698974609375, 2.3088760375976562, -20.813133239746094, 136.87399291992188, 27.98642349243164, 4.099882125854492, 14.276161193847656, 86.6287841796875, -149.0956268310547, 92.49445343017578, 80.82473754882812, 68.79573822021484, 4.607124328613281, 71.787353515625, 15.72854995727539, 25.199474334716797, 83.5386734008789, 67.1129150390625, 3.4285049438476562, 14.897552490234375, 168.91796875, 10.516586303710938, 15.053031921386719, 111.72012329101562, -16.078697204589844, 101.66853332519531, 2.872722625732422, 26.498687744140625, -47.36518096923828, -28.225723266601562, 44.295875549316406, -26.876483917236328, 28.465606689453125, 155.17149353027344, 3.5688514709472656, 3.082366943359375, 69.04881286621094, 72.27757263183594, -12.183334350585938, 150.96414184570312, 129.11444091796875, -75.31132507324219, -23.226686477661133, 41.4619140625, 149.78379821777344, -1.5472908020019531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000338.npy"}
|
||||
{"epoch": 0.5109599395313681, "step": 339, "batch_size": 64, "mean": 41.83207702636719, "std": 66.67794036865234, "min": -135.58230590820312, "p10": -25.607203674316395, "median": 38.26523399353027, "p90": 143.15965728759767, "max": 172.39569091796875, "pos_frac": 0.765625, "sample": [45.64805603027344, 20.388328552246094, 15.653419494628906, -11.326629638671875, -13.422401428222656, 141.4561309814453, 2.4641189575195312, 47.98894500732422, 10.099170684814453, -80.24691009521484, 61.624847412109375, -7.3198394775390625, 7.053688049316406, 38.755680084228516, 46.90340042114258, 13.479881286621094, -82.50421142578125, 101.35446166992188, 136.8324737548828, 5.772905349731445, 143.88973999023438, 36.14616394042969, 74.32286071777344, 52.845558166503906, 38.148475646972656, 148.36856079101562, 0.7307205200195312, 30.33932113647461, -30.829261779785156, 52.74656677246094, 59.3756217956543, 107.74313354492188, 160.65174865722656, 158.73580932617188, 67.61052703857422, 0.4664115905761719, 77.61136627197266, -44.99333190917969, 85.39675903320312, 68.73936462402344, 156.23165893554688, 29.996627807617188, -8.927145004272461, -8.806724548339844, 38.527587890625, -2.4274444580078125, 20.9810791015625, 80.60629272460938, -104.4037094116211, -36.770477294921875, 10.411062240600586, 4.502758026123047, 109.9637451171875, 121.83058166503906, -135.58230590820312, 172.39569091796875, -8.128570556640625, 46.55122375488281, 3.1274566650390625, 95.38375854492188, 97.86904907226562, 38.38199234008789, -0.7050800323486328, 167.5423126220703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000339.npy"}
|
||||
{"epoch": 0.5124716553287982, "step": 340, "batch_size": 64, "mean": 42.899810791015625, "std": 79.68955993652344, "min": -149.07522583007812, "p10": -50.40822219848632, "median": 27.964234352111816, "p90": 153.3026916503906, "max": 171.6899871826172, "pos_frac": 0.625, "sample": [72.18156433105469, 149.0195770263672, 20.118717193603516, 65.22967529296875, -4.4157257080078125, 160.1438751220703, 2.3960514068603516, -3.6696720123291016, -44.22637176513672, 161.14694213867188, -4.417854309082031, 101.85853576660156, 54.225677490234375, 125.97840881347656, 161.58792114257812, 103.14912414550781, 28.467714309692383, -68.95365905761719, 47.09466552734375, 88.31304931640625, -30.555513381958008, -7.410707473754883, -93.24436950683594, 91.05193328857422, -12.159942626953125, 171.6899871826172, -8.774948120117188, 142.60107421875, -15.112541198730469, 153.26812744140625, -115.29563903808594, 133.8809356689453, 2.3321533203125, 27.46075439453125, 145.12985229492188, 58.37923812866211, 137.0439910888672, -0.37774085998535156, -25.820114135742188, 146.84310913085938, 116.48121643066406, -31.62030029296875, -1.7108383178710938, 155.26844787597656, 54.59963607788086, -3.364114761352539, -60.163970947265625, -85.29035949707031, 1.8336105346679688, 153.3175048828125, -10.2264404296875, 57.39686965942383, 89.71332550048828, -13.601861953735352, -149.07522583007812, -53.057586669921875, 152.94117736816406, 19.649742126464844, 37.84375762939453, 168.33102416992188, 30.768970489501953, 5.185874938964844, -18.99903106689453, 13.208549499511719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000340.npy"}
|
||||
{"epoch": 0.5139833711262283, "step": 341, "batch_size": 64, "mean": 50.25506591796875, "std": 74.42554473876953, "min": -109.46499633789062, "p10": -38.01768798828124, "median": 31.993234634399414, "p90": 154.23641967773438, "max": 172.8846435546875, "pos_frac": 0.75, "sample": [-56.7553825378418, 59.429847717285156, 151.8572998046875, 78.99362182617188, 116.1137466430664, 86.02484130859375, 121.36556243896484, 172.8846435546875, 154.4652099609375, -27.03131103515625, 141.08961486816406, 29.778045654296875, -106.27410125732422, -4.332332611083984, 7.326456069946289, 4.160099029541016, 118.09003448486328, 3.5572128295898438, 130.13424682617188, 157.13819885253906, 123.69082641601562, 153.70257568359375, 155.24888610839844, 75.26264953613281, -69.14950561523438, 145.4058837890625, 107.85771179199219, 18.601848602294922, -22.975669860839844, -20.105850219726562, 34.20842361450195, 9.704971313476562, -20.314849853515625, 4.0568695068359375, 21.968399047851562, 145.2431640625, 53.52573776245117, 26.484373092651367, 85.99637603759766, -48.870269775390625, -26.192970275878906, -68.17605590820312, 75.40682220458984, 161.3236083984375, 3.1835708618164062, 155.3240203857422, 27.243438720703125, 75.70622253417969, 93.5611801147461, 161.401611328125, 60.217926025390625, 8.54547119140625, 130.61187744140625, -42.72613525390625, 2.5622406005859375, 115.86210632324219, 76.08264923095703, 24.138671875, -1.3162250518798828, -21.95550537109375, -22.94780731201172, 14.73883056640625, 5.635374069213867, -109.46499633789062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000341.npy"}
|
||||
{"epoch": 0.5154950869236583, "step": 342, "batch_size": 64, "mean": 49.900508880615234, "std": 73.42061614990234, "min": -122.01913452148438, "p10": -33.69090805053711, "median": 33.694217681884766, "p90": 157.18788146972656, "max": 208.9805145263672, "pos_frac": 0.71875, "sample": [24.704566955566406, 34.919464111328125, -0.29650115966796875, -4.832664489746094, 2.977926254272461, 157.4290313720703, 30.082416534423828, 125.608642578125, -62.291290283203125, 11.03485107421875, -4.805391311645508, 37.43072509765625, -2.355070114135742, 152.14291381835938, -2.1404647827148438, -52.15943908691406, 3.2215404510498047, -122.01913452148438, -1.971456527709961, -39.19762420654297, 23.050548553466797, 155.1439666748047, 102.77268981933594, 156.6251983642578, -34.104774475097656, 50.86658477783203, 25.841796875, 81.3468017578125, 121.85220336914062, 71.72972106933594, 50.57918167114258, 55.808406829833984, 33.139862060546875, 131.7713165283203, 49.57476806640625, -6.539735794067383, 128.1553497314453, 12.292068481445312, -32.7252197265625, 149.9949951171875, 15.227066040039062, 159.9904327392578, 61.505462646484375, 78.32171630859375, -44.09834289550781, 162.83253479003906, 2.5617141723632812, -2.5270462036132812, 208.9805145263672, 170.73326110839844, 110.99925994873047, -90.82701873779297, 15.603141784667969, 153.8129119873047, -14.87850570678711, 159.24501037597656, 63.02718734741211, -3.1511764526367188, 7.853843688964844, 62.72871780395508, 75.10137939453125, 34.248573303222656, 7.722991943359375, 183.96026611328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000342.npy"}
|
||||
{"epoch": 0.5170068027210885, "step": 343, "batch_size": 64, "mean": 34.25056838989258, "std": 67.61906433105469, "min": -146.59033203125, "p10": -19.893262481689447, "median": 23.845191955566406, "p90": 122.56351089477539, "max": 220.11123657226562, "pos_frac": 0.71875, "sample": [77.94206237792969, 8.036689758300781, -14.102256774902344, 0.8688507080078125, -5.2281646728515625, 71.99012756347656, -10.505340576171875, -0.7114086151123047, 4.319122314453125, -3.7833938598632812, -133.39129638671875, 26.457550048828125, 36.55897521972656, 83.6540298461914, 220.11123657226562, -5.766063690185547, 141.10128784179688, 151.80465698242188, 122.17215728759766, 109.54022216796875, 27.295257568359375, 3.7192745208740234, 58.48565673828125, 122.56368255615234, 10.968257904052734, 49.234439849853516, 22.043304443359375, -40.12744140625, 43.16931915283203, -1.383565902709961, -86.00125885009766, 39.45808410644531, 100.30325317382812, 13.751846313476562, 20.811309814453125, -4.110191345214844, 84.25485229492188, 122.5631103515625, -135.1893310546875, 146.6226348876953, -22.3751220703125, 56.10572814941406, 38.21623992919922, 164.69775390625, 71.76617431640625, 11.844863891601562, 50.5689697265625, 31.509273529052734, 5.4896392822265625, 125.63833618164062, 102.09932708740234, 8.715843200683594, -3.9925918579101562, 85.11502838134766, 53.66175079345703, 25.647079467773438, -3.320892333984375, 15.033172607421875, 49.04803466796875, 20.61022186279297, -146.59033203125, -5.7382659912109375, -27.461254119873047, 6.245820999145508], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000343.npy"}
|
||||
{"epoch": 0.5185185185185185, "step": 344, "batch_size": 64, "mean": 51.47527313232422, "std": 70.54987335205078, "min": -101.8991470336914, "p10": -34.41031723022461, "median": 48.861562728881836, "p90": 147.93878631591798, "max": 232.93008422851562, "pos_frac": 0.765625, "sample": [-54.45109558105469, 135.72662353515625, 22.412681579589844, 42.99642562866211, 157.87884521484375, -1.7589855194091797, 1.0741539001464844, 31.884075164794922, 17.232131958007812, 71.20600891113281, 16.310882568359375, 11.63055419921875, 23.29932975769043, 109.5196533203125, 85.64588928222656, 64.91606140136719, 86.85174560546875, 232.93008422851562, -9.988143920898438, 83.7073974609375, -49.650123596191406, -37.41346740722656, -31.13610076904297, 166.54006958007812, 1.8489151000976562, -10.724403381347656, -55.17189025878906, 3.1731491088867188, 174.81675720214844, 81.51390075683594, -35.81355285644531, 163.6966552734375, -6.691780090332031, 95.19912719726562, 54.72669982910156, 149.4243621826172, 143.31033325195312, 14.534187316894531, 97.97378540039062, -101.8991470336914, 0.5997505187988281, 9.683467864990234, -11.484779357910156, 3.3962535858154297, -11.068885803222656, -2.947824478149414, 153.6865234375, 140.0970916748047, 64.55708312988281, 74.31858825683594, 110.2671127319336, 55.511322021484375, 98.31916046142578, 4.618047714233398, 144.47244262695312, 18.05620002746582, 54.804840087890625, 91.5967788696289, 14.556869506835938, 116.4135971069336, -87.37726593017578, 97.4583740234375, 114.29043579101562, 93.31059265136719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000344.npy"}
|
||||
{"epoch": 0.5200302343159486, "step": 345, "batch_size": 64, "mean": 43.597835540771484, "std": 69.9998779296875, "min": -116.65618133544922, "p10": -24.661605072021484, "median": 36.390045166015625, "p90": 139.73633117675783, "max": 192.886474609375, "pos_frac": 0.75, "sample": [53.44805908203125, 11.620059967041016, 164.2578125, -87.42930603027344, 6.267810821533203, 13.498031616210938, -4.938865661621094, -54.04188537597656, 41.50678253173828, -16.0400390625, 136.5146484375, 99.85334777832031, 101.8361587524414, 101.61014556884766, -70.14924621582031, 102.94010162353516, -80.7728042602539, 31.676471710205078, 34.772705078125, 118.19955444335938, 192.886474609375, 9.647192001342773, 102.68516540527344, 38.689697265625, 110.12287139892578, -12.373123168945312, 9.561149597167969, 38.01616668701172, -3.2414627075195312, 53.33446502685547, 85.73340606689453, 173.14865112304688, 102.77641296386719, 54.183692932128906, 38.00738525390625, -116.65618133544922, 157.27557373046875, 170.32608032226562, 72.58537292480469, -19.174327850341797, 140.0897979736328, 38.374839782714844, -23.337051391601562, 138.9115753173828, 29.096099853515625, -16.69301986694336, 26.51789093017578, -25.229270935058594, 21.015823364257812, 21.316200256347656, 18.41265106201172, 12.036819458007812, -12.923164367675781, 27.48797607421875, 5.271064758300781, 78.7315673828125, 0.9882411956787109, -104.08030700683594, -10.868892669677734, 42.141014099121094, 65.09180450439453, 80.60968017578125, 122.80682373046875, 152.32904052734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000345.npy"}
|
||||
{"epoch": 0.5215419501133787, "step": 346, "batch_size": 64, "mean": 51.55143737792969, "std": 73.3689193725586, "min": -157.70046997070312, "p10": -37.07147712707519, "median": 45.313541412353516, "p90": 151.9592315673828, "max": 177.10635375976562, "pos_frac": 0.78125, "sample": [94.73284149169922, 116.50938415527344, 97.97320556640625, 87.63251495361328, 100.90280151367188, 45.02278137207031, 16.075897216796875, 76.29060363769531, 147.02963256835938, 165.45457458496094, 151.84039306640625, 5.462196350097656, 68.4725341796875, 138.66510009765625, -31.935367584228516, 156.2690887451172, -94.79344177246094, 50.10414505004883, -39.272666931152344, 60.79140853881836, 83.19894409179688, 80.17349243164062, 47.23921203613281, 45.60430145263672, 146.69122314453125, 177.10635375976562, -157.70046997070312, 13.860130310058594, 8.177120208740234, -78.66244506835938, 7.590484619140625, -60.249961853027344, 138.2046356201172, -1.4638748168945312, 43.20595169067383, 30.73619842529297, 7.135261535644531, -8.598655700683594, 117.78456115722656, 123.88151550292969, 168.44635009765625, 108.77706909179688, -10.463638305664062, 159.46360778808594, 21.287147521972656, 26.485023498535156, 103.3264389038086, 66.43067932128906, -49.019264221191406, 79.84364318847656, 35.655731201171875, 13.71142578125, 25.163328170776367, 0.6144332885742188, -67.9762954711914, 11.509231567382812, 15.414337158203125, 102.48124694824219, -18.035255432128906, 171.55923461914062, -8.494503021240234, -0.6403331756591797, 14.600723266601562, 152.01016235351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000346.npy"}
|
||||
{"epoch": 0.5230536659108088, "step": 347, "batch_size": 64, "mean": 40.040870666503906, "std": 69.73436737060547, "min": -119.75094604492188, "p10": -37.564408874511706, "median": 19.478099822998047, "p90": 148.04926300048828, "max": 176.85174560546875, "pos_frac": 0.6875, "sample": [38.133697509765625, -5.577136993408203, 31.475723266601562, 64.69346618652344, -90.87043762207031, 17.786346435546875, 85.85476684570312, 67.75579071044922, 102.8834457397461, 141.35061645507812, 3.95855712890625, 144.7181854248047, 12.459991455078125, -72.61083221435547, -42.19159698486328, 170.75506591796875, 14.093948364257812, 41.4761848449707, -18.478546142578125, 6.946708679199219, 19.009414672851562, 3.763669967651367, 109.51106262207031, -2.05938720703125, -42.1416015625, 15.961650848388672, 80.4422836303711, 119.31976318359375, 153.93917846679688, -54.779449462890625, 34.45923614501953, -25.349836349487305, -25.959815979003906, 129.50006103515625, -5.560281753540039, 59.277645111083984, -4.930919647216797, 118.26829528808594, -12.9722900390625, 37.228599548339844, 41.873085021972656, 19.94678497314453, -26.884292602539062, 91.01556396484375, 3.3413867950439453, 163.95376586914062, -45.192665100097656, -4.937583923339844, 176.85174560546875, 166.0996856689453, 161.03659057617188, 133.95443725585938, 7.7451324462890625, 33.04045104980469, -119.75094604492188, 59.78429412841797, 59.740013122558594, 4.517730712890625, -6.184421539306641, 0.36469459533691406, -14.889896392822266, -3.8952178955078125, 149.47686767578125, 90.06736755371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000347.npy"}
|
||||
{"epoch": 0.5245653817082389, "step": 348, "batch_size": 64, "mean": 48.25403594970703, "std": 78.28681945800781, "min": -116.52203369140625, "p10": -36.485365295410155, "median": 27.47051239013672, "p90": 162.68508453369142, "max": 228.73699951171875, "pos_frac": 0.6875, "sample": [149.477783203125, 152.71307373046875, 11.922500610351562, 1.6965675354003906, 167.05130004882812, 50.65795135498047, -34.9688720703125, -12.624534606933594, -116.52203369140625, 4.994728088378906, 175.75018310546875, -57.29631042480469, 27.258834838867188, 69.32695770263672, 133.36778259277344, 137.7655487060547, 24.03265380859375, 176.25144958496094, 33.3027458190918, 172.93275451660156, 7.858501434326172, 120.98942565917969, 137.86474609375, 20.825241088867188, -3.0140533447265625, 147.70584106445312, -4.9903717041015625, 143.34788513183594, -51.80848693847656, -4.8685302734375, 75.95928955078125, -0.9845962524414062, 2.641336441040039, 228.73699951171875, 27.68218994140625, -77.50283813476562, 35.10085678100586, -74.59822082519531, -28.516128540039062, 85.33256530761719, 172.9835662841797, 155.7518310546875, 50.938690185546875, 0.15556716918945312, 65.23187255859375, -15.281257629394531, -36.31214904785156, -42.95916748046875, -15.379814147949219, 76.22616577148438, -16.399394989013672, 165.65647888183594, -14.470052719116211, 66.0440902709961, 85.3666763305664, 8.744239807128906, 87.0994873046875, 39.09954833984375, -36.559600830078125, 1.629659652709961, -17.352127075195312, 85.59197998046875, 153.0654296875, 14.533916473388672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000348.npy"}
|
||||
{"epoch": 0.5260770975056689, "step": 349, "batch_size": 64, "mean": 48.701942443847656, "std": 72.60408782958984, "min": -100.87206268310547, "p10": -26.307231140136718, "median": 32.88196563720703, "p90": 157.92523498535155, "max": 181.8115997314453, "pos_frac": 0.6875, "sample": [-64.13951110839844, 28.61297607421875, 154.65386962890625, 33.094810485839844, 42.24925231933594, -2.0141372680664062, 68.25336456298828, 157.95872497558594, 62.52385711669922, 6.9753265380859375, -1.2565174102783203, 32.66912078857422, 144.73751831054688, 13.913192749023438, 26.21143341064453, 93.91024780273438, 104.61640167236328, -78.24002838134766, -100.87206268310547, 157.8470916748047, 36.8182373046875, 115.16316223144531, -27.21942138671875, -0.1566162109375, 136.50228881835938, 25.8780517578125, 2.3312301635742188, 63.83656692504883, -9.574951171875, 103.45648193359375, 117.93266296386719, 133.11602783203125, 2.2445602416992188, 18.696271896362305, -6.5788726806640625, 150.32891845703125, 33.63159942626953, 175.8329620361328, -85.95507049560547, 49.727596282958984, 174.84295654296875, 11.510780334472656, 7.041038513183594, 37.63519287109375, 84.46588134765625, 160.64395141601562, 162.97216796875, -2.3259315490722656, -19.014572143554688, -2.1163997650146484, 93.55436706542969, -22.543838500976562, -5.597740173339844, 4.016876220703125, 51.30912780761719, 179.34634399414062, -24.178787231445312, -30.404556274414062, -39.256202697753906, 121.9283218383789, -1.2974987030029297, -7.268363952636719, 82.13317108154297, 181.8115997314453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000349.npy"}
|
||||
{"epoch": 0.527588813303099, "step": 350, "batch_size": 64, "mean": 61.641326904296875, "std": 71.0028305053711, "min": -91.94775390625, "p10": -17.818466377258297, "median": 59.45893669128418, "p90": 154.85601501464845, "max": 206.2367706298828, "pos_frac": 0.796875, "sample": [131.10888671875, -13.214012145996094, 77.16041564941406, -10.970687866210938, 86.62071990966797, 23.601655960083008, 153.85440063476562, 84.95240783691406, 80.50181579589844, 150.69711303710938, 9.663497924804688, -47.14404296875, 58.634002685546875, 101.3204345703125, 44.862518310546875, 77.37409973144531, 130.53335571289062, 24.146839141845703, -7.657779693603516, -3.3776302337646484, 59.671966552734375, 81.23056030273438, -88.2646255493164, 120.91073608398438, 67.05001068115234, 5.0240936279296875, -91.94775390625, 206.2367706298828, -15.018838882446289, 35.119266510009766, 127.86296844482422, -61.83721923828125, 152.05825805664062, 8.960319519042969, 129.11671447753906, 166.80128479003906, 72.99845886230469, 135.8084716796875, 161.80758666992188, 191.96554565429688, 64.36078643798828, 171.09375, 155.2852783203125, -32.31212615966797, 42.63731002807617, 4.420894622802734, 7.136314392089844, 21.52501106262207, -19.018306732177734, 72.43805694580078, 82.93955993652344, 59.245906829833984, 1.1667194366455078, 39.73858642578125, 44.680274963378906, -63.779266357421875, 44.63348388671875, 37.559593200683594, -6.446380615234375, 46.696380615234375, 160.95738220214844, 136.1407928466797, 105.5232925415039, 150.19903564453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000350.npy"}
|
||||
{"epoch": 0.5291005291005291, "step": 351, "batch_size": 64, "mean": 47.013553619384766, "std": 79.08891296386719, "min": -157.91653442382812, "p10": -43.60661392211914, "median": 36.494266510009766, "p90": 149.4046295166016, "max": 160.66390991210938, "pos_frac": 0.75, "sample": [-2.0092105865478516, 125.43920135498047, 154.09432983398438, 119.85871124267578, -0.6129417419433594, 139.5033416748047, -12.261608123779297, 34.27001953125, 159.60433959960938, 38.06599426269531, 21.46710205078125, 152.99032592773438, 9.90882682800293, -26.95281982421875, 23.21063995361328, -6.941535949707031, 13.832183837890625, 6.79150390625, 112.85367584228516, 90.06843566894531, 71.39488983154297, -105.74079132080078, 152.6669921875, 13.180831909179688, 141.79244995117188, 157.11209106445312, 84.20330810546875, 98.29776000976562, -157.91653442382812, 58.80204772949219, 138.29623413085938, -81.80362701416016, 96.55165100097656, -6.11798095703125, 123.15336608886719, -49.28382110595703, -20.95897674560547, 122.26641845703125, 34.92253875732422, 32.95711135864258, 86.5905532836914, 160.66390991210938, 23.716552734375, -45.12932586669922, 67.05735778808594, 21.035568237304688, 83.22358703613281, 64.22004699707031, 3.8686370849609375, 114.31906127929688, -40.053619384765625, 4.055320739746094, 117.33815002441406, 92.49575805664062, 153.8520965576172, 23.95184326171875, 3.37677001953125, 102.68733215332031, -157.15834045410156, 11.59663200378418, -140.47593688964844, 117.71055603027344, 105.13078308105469, -22.162322998046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000351.npy"}
|
||||
{"epoch": 0.5306122448979592, "step": 352, "batch_size": 64, "mean": 51.82670593261719, "std": 73.00234985351562, "min": -118.14393615722656, "p10": -19.342455863952637, "median": 39.935895919799805, "p90": 152.52888946533204, "max": 232.56417846679688, "pos_frac": 0.75, "sample": [148.63250732421875, 183.2591094970703, 150.37091064453125, 9.320083618164062, 140.65792846679688, -118.14393615722656, -7.774784088134766, -1.3852348327636719, 52.54191207885742, 78.05656433105469, 70.89012145996094, 122.5872802734375, -94.77365112304688, -2.4319801330566406, 81.52130126953125, 19.20654296875, -28.126617431640625, -5.94488525390625, -57.221595764160156, 19.228286743164062, 97.08378601074219, 67.38066101074219, 24.845672607421875, 159.16038513183594, 152.9281463623047, 76.429443359375, 37.19309997558594, -112.88365936279297, 21.041309356689453, -19.438159942626953, 38.760250091552734, -1.6103763580322266, 11.96630859375, 66.97303771972656, 54.011505126953125, 99.03207397460938, 33.996559143066406, 156.79238891601562, 162.36492919921875, 48.72303009033203, 151.5972900390625, 1.1692581176757812, 232.56417846679688, 26.246782302856445, 101.85416412353516, 144.0159149169922, -9.279380798339844, 13.499847412109375, -19.1191463470459, 44.37083053588867, 1.73846435546875, 70.00582885742188, 27.21868896484375, 132.51571655273438, 135.6998291015625, -1.9502410888671875, -24.927207946777344, 156.5416717529297, -6.068889617919922, 9.163192749023438, 0.31381988525390625, 109.84416198730469, 43.56269836425781, 41.111541748046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000352.npy"}
|
||||
{"epoch": 0.5321239606953893, "step": 353, "batch_size": 64, "mean": 50.21971130371094, "std": 71.05633544921875, "min": -160.13302612304688, "p10": -8.929261016845702, "median": 33.613603591918945, "p90": 148.59515991210938, "max": 168.533203125, "pos_frac": 0.75, "sample": [80.64974212646484, 165.22225952148438, -46.62590408325195, 36.91633224487305, -90.74238586425781, 24.725627899169922, 94.98596954345703, -8.152671813964844, 149.98460388183594, 21.607131958007812, -1.5300788879394531, 34.21765899658203, 93.60969543457031, -5.3421630859375, 147.20498657226562, 13.135698318481445, 35.53590393066406, 137.39129638671875, 121.66673278808594, 149.19094848632812, 98.79277038574219, 166.42642211914062, 1.8016529083251953, 88.07239532470703, -11.600013732910156, -2.770862579345703, 162.17416381835938, 17.165489196777344, 133.93655395507812, 80.97718811035156, 89.02102661132812, -0.2784080505371094, 80.51655578613281, 0.5893898010253906, -160.13302612304688, 145.74102783203125, -67.37053680419922, -49.96530532836914, 145.65663146972656, 140.82208251953125, 65.9013671875, 151.45053100585938, 168.533203125, 0.4971923828125, -9.2620849609375, 33.00954818725586, 124.16062927246094, 12.989923477172852, -7.075565338134766, 16.169052124023438, 37.978363037109375, 27.671005249023438, -4.7442779541015625, 37.18019104003906, -0.6686859130859375, 13.189956665039062, 3.2169952392578125, 17.790786743164062, -6.045005798339844, 13.919807434082031, 102.62835693359375, 2.9849624633789062, 144.20486450195312, 55.153831481933594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000353.npy"}
|
||||
{"epoch": 0.5336356764928194, "step": 354, "batch_size": 64, "mean": 56.37581253051758, "std": 77.43402099609375, "min": -106.18875122070312, "p10": -60.977625274658195, "median": 50.6053581237793, "p90": 156.7613525390625, "max": 184.7646484375, "pos_frac": 0.75, "sample": [170.350830078125, 41.80266189575195, -21.726322174072266, 129.7884063720703, -63.520362854003906, 50.39848327636719, 18.36847686767578, 117.7657241821289, 129.8140106201172, 14.571117401123047, 116.8205337524414, 158.16741943359375, 96.89530181884766, 57.688209533691406, -4.600624084472656, 1.3466644287109375, 155.50003051757812, -31.39352035522461, 140.77931213378906, 184.7646484375, 89.48866271972656, 154.03274536132812, 50.09635925292969, 70.14654541015625, -84.0732421875, -2.2694644927978516, -106.18875122070312, 136.86813354492188, 41.07868957519531, 50.812232971191406, 28.097991943359375, 112.44285583496094, 159.1830596923828, 53.1817626953125, 46.92424011230469, 20.443321228027344, 83.62989807128906, -80.12442779541016, 157.10137939453125, 12.70233154296875, 110.02948760986328, 147.53199768066406, -75.8266372680664, -73.43161010742188, 164.88162231445312, -65.85186767578125, 176.45204162597656, 124.4874267578125, 10.813653945922852, -24.21259307861328, 38.96643829345703, 58.22784423828125, 113.3636474609375, 155.96795654296875, 52.69692611694336, 23.456905364990234, -7.812004089355469, 14.176006317138672, -16.444984436035156, 106.49736022949219, 45.40797424316406, 145.27081298828125, -55.04457092285156, -18.707256317138672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000354.npy"}
|
||||
{"epoch": 0.5351473922902494, "step": 355, "batch_size": 64, "mean": 58.43730926513672, "std": 82.01102447509766, "min": -140.97149658203125, "p10": -53.98408432006835, "median": 58.79323768615723, "p90": 160.20060882568362, "max": 187.0718994140625, "pos_frac": 0.71875, "sample": [103.22471618652344, 170.336669921875, -57.62327575683594, 4.38275146484375, 173.6402587890625, 58.52471923828125, 52.25017547607422, -9.304143905639648, -62.54419708251953, 74.59461212158203, 63.24371337890625, 27.057830810546875, 88.39163208007812, -60.3031005859375, 150.01199340820312, 105.74993896484375, -2.472423553466797, 59.0617561340332, -77.21505737304688, 127.09156036376953, -57.249603271484375, 64.14640808105469, 25.81109619140625, 161.50767517089844, 146.0130615234375, 46.43572998046875, 111.3062515258789, 32.31900405883789, 123.06603240966797, 187.0718994140625, -46.364540100097656, 30.45220184326172, -4.709316253662109, 74.71249389648438, 9.862991333007812, -140.97149658203125, 100.14802551269531, -8.346847534179688, 60.261470794677734, 153.89158630371094, -11.007164001464844, 18.937530517578125, 143.05316162109375, -9.05072021484375, -2.9849376678466797, 176.91696166992188, 156.51116943359375, 9.569561004638672, 157.11575317382812, -139.26075744628906, 152.31019592285156, 149.2233123779297, 165.0826873779297, -1.9226646423339844, 157.15078735351562, -36.836631774902344, -6.254066467285156, 29.074161529541016, 174.1138916015625, 4.01605224609375, 132.32464599609375, 30.515138626098633, 99.66170501708984, 134.26376342773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000355.npy"}
|
||||
{"epoch": 0.5366591080876795, "step": 356, "batch_size": 64, "mean": 47.24530792236328, "std": 89.5131607055664, "min": -164.16677856445312, "p10": -74.37100753784179, "median": 44.74515914916992, "p90": 158.96756896972656, "max": 192.74464416503906, "pos_frac": 0.703125, "sample": [154.3944091796875, 169.6034698486328, 45.631561279296875, -83.53553771972656, 143.799560546875, 14.056999206542969, 0.7923965454101562, 192.74464416503906, 96.78907775878906, 1.754190444946289, -122.27461242675781, -146.10281372070312, 100.89866638183594, -93.1440658569336, 157.94583129882812, 170.57040405273438, 11.420318603515625, 18.464038848876953, 148.232177734375, -1.0943164825439453, 163.52337646484375, -164.16677856445312, 159.40545654296875, 25.859405517578125, 2.9884033203125, 166.4349822998047, -71.81340026855469, -48.80690002441406, 69.40861511230469, 43.85875701904297, 84.59619903564453, 0.30658721923828125, -8.196676254272461, 3.1421890258789062, -75.46712493896484, 125.17576599121094, 135.87026977539062, -66.40936279296875, 62.0443115234375, 132.2491455078125, -6.639881134033203, 4.5343170166015625, 156.5281524658203, -16.262435913085938, 59.83709716796875, 94.84610748291016, 102.48747253417969, 10.962257385253906, -53.064735412597656, 84.0916748046875, -2.059703826904297, 97.68753051757812, 153.33636474609375, 168.92138671875, -9.007377624511719, 117.95214080810547, 45.68912124633789, -21.86261749267578, 156.0521240234375, -36.19721984863281, -80.32699584960938, 152.46917724609375, 34.704627990722656, 88.07159423828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000356.npy"}
|
||||
{"epoch": 0.5381708238851096, "step": 357, "batch_size": 64, "mean": 41.719749450683594, "std": 75.56707000732422, "min": -152.13877868652344, "p10": -44.91146736145019, "median": 42.04222869873047, "p90": 146.63075256347656, "max": 180.7438201904297, "pos_frac": 0.734375, "sample": [142.09178161621094, -5.583524703979492, 139.34869384765625, -28.647109985351562, -143.8173828125, 152.39883422851562, 45.302215576171875, 64.4779052734375, 25.78253936767578, 25.31744384765625, -16.015350341796875, 51.82141876220703, -39.92253875732422, 11.77444076538086, 161.6575927734375, 81.07388305664062, 144.07566833496094, 77.91775512695312, 86.47696685791016, 3.9766502380371094, 98.46781158447266, 57.35710906982422, 59.0067253112793, 23.49345588684082, 23.366500854492188, 88.90186309814453, -5.117216110229492, 4.988525390625, -62.4521484375, 50.30221176147461, -52.59474182128906, 24.537221908569336, -24.428058624267578, 98.92095184326172, 61.206260681152344, 160.38388061523438, 147.060546875, 42.40998840332031, 99.95356750488281, 158.84396362304688, -19.55651092529297, 61.896087646484375, 10.76788330078125, 1.9797592163085938, 163.75213623046875, 45.949676513671875, 2.2878036499023438, -26.000076293945312, -47.04957962036133, 25.6944580078125, 42.77274703979492, 180.7438201904297, 93.3521957397461, 13.649982452392578, 113.3722915649414, 41.674468994140625, -0.33260154724121094, 0.4513702392578125, 139.008544921875, -119.28208923339844, -82.12080383300781, -152.13877868652344, 145.62789916992188, -0.5532169342041016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000357.npy"}
|
||||
{"epoch": 0.5396825396825397, "step": 358, "batch_size": 64, "mean": 57.08154296875, "std": 79.71619415283203, "min": -123.05760192871094, "p10": -42.38678894042968, "median": 36.565311431884766, "p90": 162.12142639160157, "max": 242.99761962890625, "pos_frac": 0.78125, "sample": [130.59432983398438, 95.27711486816406, 99.41813659667969, 162.73663330078125, -52.63975524902344, 170.07321166992188, 153.43911743164062, -34.990928649902344, -7.917499542236328, 23.508331298828125, -10.171443939208984, 126.24166870117188, 47.53247833251953, 112.220703125, 150.14950561523438, 46.156646728515625, -56.1405029296875, 5.1395416259765625, 112.43896484375, 22.032012939453125, 17.303009033203125, 26.748729705810547, -37.066368103027344, 105.13553619384766, 152.74169921875, 7.714134216308594, 32.66188049316406, 16.627975463867188, -29.04254150390625, 21.93704605102539, -86.19197082519531, 37.45307159423828, -68.33062744140625, 4.364187240600586, -29.393043518066406, 48.46440124511719, 101.53215026855469, 35.62823486328125, 21.746116638183594, 0.13082313537597656, 166.0738525390625, 204.05038452148438, 242.99761962890625, 8.134769439697266, 193.23605346679688, 160.68594360351562, 91.05611419677734, -46.32882308959961, 35.67755126953125, 56.3099365234375, -1.2690010070800781, -44.666969299316406, -123.05760192871094, 146.09140014648438, 8.703536987304688, 146.73129272460938, 30.672122955322266, 92.43324279785156, 17.67291259765625, 99.63090515136719, 123.17938232421875, 129.835693359375, 55.0406494140625, 185.06497192382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000358.npy"}
|
||||
{"epoch": 0.5411942554799698, "step": 359, "batch_size": 64, "mean": 29.22472381591797, "std": 71.66439056396484, "min": -118.70895385742188, "p10": -57.185736846923824, "median": 23.707551956176758, "p90": 138.67734832763674, "max": 183.21009826660156, "pos_frac": 0.65625, "sample": [-45.80265808105469, 88.95730590820312, -21.743026733398438, 8.295074462890625, 55.93010711669922, 47.731529235839844, -26.151512145996094, 20.504562377929688, -51.291961669921875, -57.75335693359375, 183.21009826660156, 61.307891845703125, -6.9595947265625, 46.0552978515625, -2.8308486938476562, 72.36927795410156, 150.84613037109375, -55.861289978027344, -5.810577392578125, -113.40721130371094, 23.477275848388672, 29.239521026611328, 25.278289794921875, -49.85374450683594, 14.265228271484375, 141.62619018554688, 66.56361389160156, 22.231365203857422, 57.945770263671875, -107.24918365478516, 168.50823974609375, 180.376953125, -42.23529052734375, 124.80282592773438, 32.239471435546875, 23.937828063964844, 2.8989334106445312, 103.45384216308594, 30.751449584960938, 14.297401428222656, -65.54358673095703, 107.58124542236328, 139.22543334960938, 69.99029541015625, 4.506340026855469, 58.419464111328125, -45.02832794189453, -82.0782470703125, -59.05158996582031, 66.08206176757812, 40.16569900512695, -4.588788986206055, -118.70895385742188, -12.091028213500977, 82.11223602294922, -0.7185440063476562, 88.46244812011719, 10.294088363647461, 69.25347900390625, 137.3984832763672, 26.188617706298828, 158.1397247314453, 7.451129913330078, -17.230438232421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000359.npy"}
|
||||
{"epoch": 0.5427059712773998, "step": 360, "batch_size": 64, "mean": 48.450965881347656, "std": 73.20993041992188, "min": -113.99016571044922, "p10": -26.65893669128418, "median": 29.400880813598633, "p90": 153.82484283447266, "max": 196.69244384765625, "pos_frac": 0.671875, "sample": [-26.037322998046875, 153.07933044433594, 17.655120849609375, 7.37127685546875, 66.14032745361328, 104.87540435791016, -2.589996337890625, 35.45505142211914, 34.47574996948242, 165.865234375, -0.14356040954589844, -71.18914794921875, -8.927276611328125, -62.268863677978516, 141.75704956054688, -59.927093505859375, 157.21636962890625, -26.925342559814453, 0.029636383056640625, 142.58152770996094, 29.488117218017578, -3.3701248168945312, -50.6314697265625, 11.3828125, -6.1644287109375, -3.9962005615234375, 122.70481872558594, 3.3727645874023438, 100.6386947631836, -8.097457885742188, 36.719703674316406, 13.445426940917969, 93.0640869140625, -28.953216552734375, 29.313644409179688, 154.14434814453125, -13.026458740234375, 90.79706573486328, 154.25192260742188, 15.811599731445312, 184.1541748046875, -6.622894287109375, 143.03384399414062, -10.179677963256836, 196.69244384765625, 18.075706481933594, 152.12075805664062, 12.229118347167969, 55.39216232299805, 134.2939910888672, 128.01492309570312, 53.864013671875, 167.69052124023438, -15.784852981567383, -8.003446578979492, 74.55203247070312, -12.31611442565918, -113.99016571044922, 53.51507568359375, 119.81773376464844, 33.15521240234375, 7.2614288330078125, 107.7104263305664, 116.79609680175781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000360.npy"}
|
||||
{"epoch": 0.54421768707483, "step": 361, "batch_size": 64, "mean": 58.25619888305664, "std": 71.76156616210938, "min": -98.31936645507812, "p10": -23.020615768432616, "median": 57.18436622619629, "p90": 151.2000289916992, "max": 180.38699340820312, "pos_frac": 0.765625, "sample": [80.31684112548828, 114.52576446533203, 30.496719360351562, -8.662017822265625, 160.63035583496094, 65.39197540283203, -49.424869537353516, -98.31936645507812, 61.20425033569336, 42.46864700317383, 118.98335266113281, -40.94321823120117, -2.687347412109375, 150.3013916015625, 100.53558349609375, 93.21085357666016, -23.15097427368164, 3.048625946044922, 52.57147216796875, 10.800308227539062, 80.23818969726562, 53.16448211669922, 13.858451843261719, 159.3787841796875, 142.61138916015625, 141.52789306640625, 142.2233428955078, -91.87513732910156, 144.44622802734375, 126.64703369140625, 26.870624542236328, -5.793182373046875, 122.98399353027344, 180.38699340820312, 141.72654724121094, 77.05245971679688, 149.25027465820312, 99.87744140625, -61.02937316894531, -6.083320617675781, -1.5266742706298828, 5.0457763671875, 161.83619689941406, 151.5851593017578, 8.275218963623047, 159.4598846435547, 143.31239318847656, 9.971794128417969, -53.84259796142578, -22.341583251953125, 68.96524047851562, 15.889785766601562, 79.74996948242188, 13.655330657958984, 163.65914916992188, 26.94542121887207, 131.86825561523438, 3.1090316772460938, 12.696090698242188, 66.19515991210938, 67.19698333740234, 42.384178161621094, -1.7383613586425781, -22.716445922851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000361.npy"}
|
||||
{"epoch": 0.54572940287226, "step": 362, "batch_size": 64, "mean": 59.16782760620117, "std": 89.11280822753906, "min": -157.84326171875, "p10": -40.534899139404295, "median": 42.42874526977539, "p90": 158.66717071533205, "max": 328.91949462890625, "pos_frac": 0.734375, "sample": [-50.39122009277344, 2.9944286346435547, -5.8060302734375, 1.4453067779541016, -5.928733825683594, -157.84326171875, 25.899818420410156, 81.01425170898438, 328.91949462890625, 149.27418518066406, 21.09734344482422, -14.560649871826172, 22.604393005371094, -11.498870849609375, -1.6084747314453125, -3.773090362548828, 87.82673645019531, 74.81892395019531, 96.38548278808594, 163.7552490234375, -1.3883342742919922, 31.285797119140625, 23.03389549255371, 114.78939819335938, 143.55145263671875, 0.31414794921875, 138.10260009765625, -6.091461181640625, 3.559490203857422, 124.72137451171875, 224.37567138671875, 3.1884632110595703, -28.050148010253906, 159.0364990234375, 31.401771545410156, 196.88677978515625, 147.75784301757812, 24.2791748046875, 75.71916961669922, -41.517852783203125, 1.2057228088378906, 139.85140991210938, 156.04901123046875, 157.80540466308594, 25.748062133789062, 53.455718994140625, 123.47944641113281, 183.19273376464844, 128.86077880859375, 123.22010803222656, -38.24134063720703, 20.326690673828125, 141.11557006835938, -117.80589294433594, 71.48252868652344, 83.49915313720703, 150.66419982910156, -98.68771362304688, -54.03992462158203, 105.83283996582031, 185.05712890625, -80.6741943359375, 86.54965209960938, 69.21279907226562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000362.npy"}
|
||||
{"epoch": 0.54724111866969, "step": 363, "batch_size": 64, "mean": 56.934486389160156, "std": 78.00630187988281, "min": -150.12677001953125, "p10": -14.058094215393066, "median": 40.890342712402344, "p90": 165.8816177368164, "max": 259.6091003417969, "pos_frac": 0.84375, "sample": [97.43787384033203, -115.77436065673828, 165.91494750976562, 61.76575469970703, 89.84124755859375, 35.88031768798828, 12.145545959472656, 177.69956970214844, 19.046401977539062, -40.39606857299805, 0.6369171142578125, 54.14105224609375, 259.6091003417969, 194.19711303710938, 11.757698059082031, 180.85650634765625, 36.27829360961914, 83.3999252319336, 25.489227294921875, 136.7407684326172, 13.68014907836914, 118.63584899902344, 12.668581008911133, 158.05970764160156, 156.10308837890625, 41.28279113769531, -0.638824462890625, 21.0001220703125, -12.7822265625, 29.35205841064453, -14.604894638061523, -150.12677001953125, 81.50640106201172, -104.2177734375, 74.33918762207031, 71.85022735595703, 40.497894287109375, 112.37140655517578, 28.960317611694336, -1.8379173278808594, 7.414310455322266, 10.332145690917969, 36.68800354003906, 165.80384826660156, 3.4284191131591797, 178.80648803710938, 111.42140197753906, 49.306304931640625, 156.0084228515625, 9.526626586914062, 132.74575805664062, 0.25939178466796875, 135.05294799804688, 90.9163818359375, -62.15348815917969, 57.48651123046875, 173.2918243408203, -23.177043914794922, 71.76676940917969, 30.073184967041016, 0.5843486785888672, 86.74508666992188, 47.38749694824219, 11.324758529663086], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000363.npy"}
|
||||
{"epoch": 0.5487528344671202, "step": 364, "batch_size": 64, "mean": 63.377872467041016, "std": 93.54195404052734, "min": -158.52316284179688, "p10": -33.601382446289065, "median": 58.90422058105469, "p90": 184.69189453125003, "max": 237.38392639160156, "pos_frac": 0.734375, "sample": [190.37628173828125, -4.727745056152344, 21.170204162597656, 170.81063842773438, 179.54037475585938, 13.749473571777344, 79.7734603881836, 132.72311401367188, -19.491844177246094, 191.80194091796875, 207.95521545410156, 126.5985336303711, -26.110103607177734, -54.78112030029297, 93.51535034179688, 159.1945037841797, 55.72199249267578, -13.114376068115234, 237.38392639160156, 0.73919677734375, 186.89968872070312, 66.5209732055664, 132.9232635498047, 161.60865783691406, -125.06904602050781, -65.18526458740234, 177.45803833007812, 2.2442569732666016, 0.5386905670166016, 137.7826385498047, 160.22216796875, 61.901527404785156, -5.1741485595703125, 81.41716766357422, 20.513870239257812, 3.2578887939453125, 124.19284057617188, 189.13705444335938, -33.534507751464844, 22.78203773498535, 127.53276062011719, 78.5454330444336, -33.630043029785156, -99.49882507324219, 207.857666015625, 11.193485260009766, -6.18212890625, 155.57135009765625, 3.983123779296875, 136.28526306152344, 8.030487060546875, 142.6707305908203, -127.47331237792969, 55.90691375732422, -158.52316284179688, 11.72021484375, 76.69927215576172, 122.87055969238281, -2.032430648803711, 11.825714111328125, -1.5976448059082031, -29.248985290527344, 159.5654754638672, 160.8451690673828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000364.npy"}
|
||||
{"epoch": 0.5502645502645502, "step": 365, "batch_size": 64, "mean": 60.169734954833984, "std": 69.13088989257812, "min": -101.10848999023438, "p10": -5.951942634582517, "median": 54.184335708618164, "p90": 152.32860717773437, "max": 250.14178466796875, "pos_frac": 0.796875, "sample": [47.805328369140625, 14.738611221313477, 86.99050903320312, 0.5549163818359375, 62.355796813964844, 2.0646705627441406, 152.84619140625, 117.7943344116211, 133.39076232910156, 72.89739990234375, 155.20220947265625, 70.9516830444336, -101.10848999023438, -1.7064361572265625, 19.136077880859375, 149.5598602294922, -1.536773681640625, -0.17525863647460938, -11.962379455566406, 0.622894287109375, 65.5352783203125, 79.06067657470703, 132.32485961914062, 6.382402420043945, -0.7071247100830078, -89.86430358886719, 67.67578887939453, 165.24166870117188, 33.18573760986328, 98.053955078125, 11.789749145507812, 4.109598159790039, 151.03729248046875, 63.024654388427734, 141.0305633544922, 88.04843139648438, 151.12091064453125, -3.550802230834961, 47.95915603637695, 47.918052673339844, 161.234375, 77.58489990234375, 45.69648742675781, -19.1920166015625, 96.89472961425781, 150.1339111328125, 5.24949836730957, -10.209035873413086, 250.14178466796875, 60.409515380859375, 79.9546127319336, 3.2048091888427734, 139.23443603515625, 16.322601318359375, -1.0512886047363281, 45.78302001953125, 6.309349060058594, -31.926342010498047, 89.74107360839844, 5.9476318359375, 166.60887145996094, 157.75958251953125, -6.9810028076171875, 132.2131805419922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000365.npy"}
|
||||
{"epoch": 0.5517762660619804, "step": 366, "batch_size": 64, "mean": 49.40788650512695, "std": 91.15361022949219, "min": -166.23692321777344, "p10": -69.75525360107422, "median": 41.92514991760254, "p90": 162.31890411376955, "max": 217.20603942871094, "pos_frac": 0.71875, "sample": [1.8418197631835938, 189.16580200195312, 92.50450897216797, 71.42701721191406, -8.139453887939453, 148.76959228515625, -62.79826354980469, -38.5809326171875, -49.52264404296875, -8.131576538085938, -68.23127746582031, 110.79145050048828, 17.514556884765625, 93.56988525390625, 59.755035400390625, 39.75164794921875, -89.97183227539062, 59.9588623046875, 2.1616268157958984, 110.40026092529297, 69.44253540039062, 46.497276306152344, 73.21148681640625, -12.802326202392578, 23.05348014831543, 131.50392150878906, 34.64485549926758, 158.06417846679688, -166.23692321777344, -11.256975173950195, 188.24740600585938, 5.708992004394531, -2.688985824584961, 217.20603942871094, -95.47815704345703, 1.7094783782958984, -70.40838623046875, -97.83294677734375, 16.602458953857422, 44.09865188598633, 2.0458145141601562, -29.088382720947266, 2.5958995819091797, 36.691925048828125, -108.82778930664062, 12.253860473632812, 11.207197189331055, 209.7231903076172, 178.18360900878906, 73.06546020507812, 52.2681884765625, 152.16761779785156, 147.94497680664062, -108.66758728027344, 160.7199249267578, -39.80944061279297, 151.15399169921875, 157.4676971435547, 172.27500915527344, 144.47706604003906, 163.00418090820312, 108.1637191772461, 157.52877807617188, 130.03746032714844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000366.npy"}
|
||||
{"epoch": 0.5532879818594104, "step": 367, "batch_size": 64, "mean": 50.503501892089844, "std": 77.6278305053711, "min": -166.26531982421875, "p10": -45.031318855285626, "median": 52.868736267089844, "p90": 151.22531127929688, "max": 173.43223571777344, "pos_frac": 0.796875, "sample": [1.6658134460449219, 171.042724609375, 82.78514862060547, -8.061553955078125, 92.30313110351562, -0.8419342041015625, 140.44271850585938, 148.935791015625, 97.12457275390625, 2.45159912109375, 29.866897583007812, 25.353683471679688, 8.928474426269531, 25.610031127929688, 154.88644409179688, 9.965518951416016, 63.67413330078125, -3.9714279174804688, -52.499305725097656, 152.13681030273438, 10.674209594726562, 83.70030212402344, -11.989725112915039, -56.60557556152344, 70.79827880859375, -159.3897705078125, -58.63365173339844, 87.11638641357422, 8.209430694580078, 27.955413818359375, 98.651123046875, 137.8165740966797, 121.74799346923828, 98.77825927734375, 116.69530487060547, 3.4399642944335938, 70.56803894042969, -66.73846435546875, 21.107275009155273, 153.5443572998047, 22.645004272460938, -166.26531982421875, 10.960290908813477, 173.43223571777344, 111.55782318115234, -23.43426513671875, 80.98342895507812, 16.824871063232422, 131.82960510253906, 24.875545501708984, 27.928848266601562, 66.48958587646484, -129.4183349609375, 146.69873046875, 78.8768310546875, 159.43496704101562, 42.06333923339844, -27.606016159057617, 108.40634155273438, 154.2744903564453, 23.74737548828125, 77.69624328613281, 71.87904357910156, 149.09848022460938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000367.npy"}
|
||||
{"epoch": 0.5547996976568406, "step": 368, "batch_size": 64, "mean": 46.731998443603516, "std": 75.27347564697266, "min": -160.51605224609375, "p10": -46.86295394897461, "median": 50.92617988586426, "p90": 148.1503662109375, "max": 172.3332977294922, "pos_frac": 0.734375, "sample": [-160.51605224609375, 132.8187255859375, 8.316627502441406, 29.29094886779785, 66.41810607910156, 100.86991119384766, 145.2413330078125, 69.6767807006836, -21.33722686767578, -13.853408813476562, 162.76185607910156, 1.1230888366699219, 26.5269775390625, 37.34294128417969, 163.55035400390625, 132.61416625976562, -47.24553680419922, -65.40736389160156, -73.56547546386719, 92.71349334716797, 67.26219177246094, 23.780553817749023, 55.408546447753906, -32.968605041503906, 160.86239624023438, 18.471118927001953, -29.126693725585938, 149.3970947265625, 172.3332977294922, 47.231563568115234, -20.45783042907715, 162.7738037109375, 85.71746063232422, 1.65570068359375, 25.075908660888672, 92.4036865234375, 56.363990783691406, 89.38516235351562, 19.793540954589844, 94.41355895996094, 66.67271423339844, -14.540105819702148, 125.7633056640625, 24.9447021484375, -29.214630126953125, -15.989418029785156, 46.21664810180664, -20.185546875, 135.97067260742188, -45.97026062011719, 58.96260070800781, 131.8031463623047, 151.5595703125, 135.58328247070312, 11.58213996887207, 95.64735412597656, 54.62079620361328, -59.06910705566406, -96.9416275024414, 96.0479736328125, 117.43771362304688, 27.809968948364258, -94.51734924316406, 59.536712646484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000368.npy"}
|
||||
{"epoch": 0.5563114134542706, "step": 369, "batch_size": 64, "mean": 40.28659439086914, "std": 76.40142822265625, "min": -146.84571838378906, "p10": -44.5909412384033, "median": 32.571468353271484, "p90": 157.4821014404297, "max": 220.01095581054688, "pos_frac": 0.6875, "sample": [23.418724060058594, -94.69881439208984, 41.377586364746094, 88.86962890625, -106.87379455566406, 51.605621337890625, 43.39081954956055, -0.678466796875, 131.08653259277344, 17.171337127685547, 16.700111389160156, 127.61203002929688, 137.708251953125, 101.83666229248047, 158.3159637451172, 5.3995513916015625, 78.40465545654297, 46.08390808105469, 15.694908142089844, 34.80099105834961, -8.19329833984375, -146.84571838378906, 57.60612487792969, -73.86557006835938, 45.39781188964844, 95.62077331542969, -25.174579620361328, 69.53413391113281, 51.14432907104492, -20.320714950561523, 131.71527099609375, -23.845550537109375, 58.269020080566406, 163.86712646484375, 84.99991607666016, 142.5492401123047, 172.22801208496094, 4.2935943603515625, 24.820892333984375, -82.31868743896484, -13.52606201171875, 23.300559997558594, -12.71236801147461, 59.64936065673828, -7.099788665771484, 31.40302276611328, 220.01095581054688, 3.5500335693359375, 157.51492309570312, -17.193939208984375, -11.410812377929688, 33.73991394042969, -0.7769050598144531, 162.30661010742188, 1.8741455078125, -6.124143600463867, -0.7310810089111328, 169.2913818359375, 23.76349639892578, 43.545684814453125, -52.91223907470703, 75.68048095703125, -100.91522216796875, 157.405517578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000369.npy"}
|
||||
{"epoch": 0.5578231292517006, "step": 370, "batch_size": 64, "mean": 58.222923278808594, "std": 86.28752136230469, "min": -188.0511016845703, "p10": -32.17846069335936, "median": 56.80152130126953, "p90": 171.9585922241211, "max": 199.67578125, "pos_frac": 0.765625, "sample": [71.63267517089844, 170.66757202148438, 121.05870819091797, 57.66333770751953, 199.67578125, 5.841300964355469, 176.3742218017578, 2.31689453125, 165.21234130859375, 6.870723724365234, 18.248031616210938, 62.52581787109375, 1.7918853759765625, 157.35772705078125, 187.63780212402344, 95.246337890625, 56.19535827636719, -99.05286407470703, -17.624343872070312, 62.929412841796875, 57.407684326171875, 102.29454040527344, 88.21015930175781, 86.70680236816406, 168.9753875732422, 101.770263671875, 86.74967956542969, 28.206493377685547, -7.2091827392578125, -3.507537841796875, -2.5424652099609375, -11.968498229980469, -8.219001770019531, -79.97802734375, 19.765625, 71.61983489990234, 190.26025390625, 41.591766357421875, 160.06655883789062, 151.19700622558594, 184.21383666992188, -188.0511016845703, 21.064393997192383, 81.59284973144531, 15.21296501159668, 172.5118865966797, 107.10655212402344, 129.41128540039062, 151.2178955078125, 174.60821533203125, -163.1873779296875, 0.6637115478515625, 156.1987762451172, -2.8523426055908203, -38.41593933105469, 22.881118774414062, 30.668975830078125, -2.5895843505859375, -71.6306381225586, 2.794981002807617, 43.13904571533203, 154.96701049804688, -46.945194244384766, 47.71992492675781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000370.npy"}
|
||||
{"epoch": 0.5593348450491308, "step": 371, "batch_size": 64, "mean": 51.01673126220703, "std": 79.52940368652344, "min": -92.2214126586914, "p10": -59.17540435791015, "median": 37.93863296508789, "p90": 158.7400314331055, "max": 206.22265625, "pos_frac": 0.71875, "sample": [153.96597290039062, -89.3636703491211, 179.15179443359375, 33.237709045410156, 76.08822631835938, 6.06817626953125, 129.98605346679688, 14.983743667602539, -19.102859497070312, 11.282064437866211, -17.64935302734375, 124.95350646972656, -71.75303649902344, 173.82460021972656, -0.1479644775390625, 206.22265625, -3.038137435913086, 114.03904724121094, 21.652694702148438, 160.7860565185547, 44.90214538574219, -85.70814514160156, -4.864351272583008, 1.62200927734375, 32.98619079589844, 140.78208923339844, 126.4344253540039, 197.55313110351562, 127.3570556640625, 86.24742889404297, 130.5389404296875, 176.16336059570312, -61.461997985839844, 5.227325439453125, 92.77403259277344, 24.180343627929688, -10.307296752929688, -92.2214126586914, -19.681289672851562, 171.0697479248047, -78.37371826171875, 22.30374526977539, 7.367712020874023, 79.06536865234375, 61.57557678222656, -39.812110900878906, 116.35298919677734, 81.62113189697266, -29.89349365234375, 42.639556884765625, 117.30899047851562, -12.900199890136719, 5.555198669433594, 96.42745971679688, 117.82652282714844, 0.7856292724609375, 130.35733032226562, 21.170188903808594, 67.92970275878906, 66.41632843017578, 145.1014862060547, 85.4229507446289, -53.84001922607422, -74.11872863769531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000371.npy"}
|
||||
{"epoch": 0.5608465608465608, "step": 372, "batch_size": 64, "mean": 30.66883087158203, "std": 94.01616668701172, "min": -164.73355102539062, "p10": -97.25783462524413, "median": 10.514084815979004, "p90": 166.0564407348633, "max": 188.5548095703125, "pos_frac": 0.65625, "sample": [-32.32136535644531, 16.311511993408203, 163.07077026367188, 97.27444458007812, 77.9664306640625, 90.05236053466797, 174.9997100830078, -34.76427459716797, 147.66177368164062, 127.2963638305664, 187.736328125, -6.914886474609375, 104.29396057128906, 183.23678588867188, 40.55773162841797, 124.4356689453125, 128.87896728515625, -155.3612823486328, 9.675010681152344, 19.601661682128906, 6.884563446044922, -10.681516647338867, 30.277191162109375, 162.82138061523438, 7.986377716064453, -17.495155334472656, -92.75601196289062, 2.2642974853515625, 172.15281677246094, -99.18718719482422, 98.23909759521484, 30.875076293945312, -50.45100021362305, -2.712932586669922, -107.48643493652344, 10.602209091186523, -164.73355102539062, 9.847129821777344, 134.94097900390625, 30.053335189819336, 188.5548095703125, 67.928955078125, 134.70556640625, 55.81617736816406, -68.48294067382812, 80.40869140625, -160.50791931152344, 2.1693801879882812, -1.3428897857666016, 6.486305236816406, 7.98675537109375, -145.92514038085938, 101.97616577148438, 3.8202590942382812, -48.46088409423828, -55.975990295410156, -124.95890808105469, 49.832969665527344, 174.109130859375, 167.3360137939453, -82.94342041015625, -10.97723388671875, -4.30499267578125, 10.425960540771484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000372.npy"}
|
||||
{"epoch": 0.562358276643991, "step": 373, "batch_size": 64, "mean": 42.52513122558594, "std": 92.29227447509766, "min": -164.19577026367188, "p10": -89.5217071533203, "median": 44.41099548339844, "p90": 164.9387985229492, "max": 200.14955139160156, "pos_frac": 0.6875, "sample": [-134.42141723632812, 154.66473388671875, 135.51475524902344, 146.83819580078125, 151.8010711669922, -135.0531005859375, -47.433509826660156, 131.08274841308594, 115.06026458740234, 44.869171142578125, 132.46092224121094, -121.37386322021484, 15.97198486328125, -38.35688781738281, -101.31045532226562, -16.59738540649414, 66.284423828125, 88.50174713134766, -164.19577026367188, -53.160797119140625, 72.4351806640625, -16.15929412841797, 91.2657241821289, -74.35773468017578, 165.34654235839844, 153.10365295410156, -22.348648071289062, -6.8166961669921875, 79.9424057006836, 95.5782699584961, 167.17306518554688, 1.285430908203125, 42.65463638305664, 163.98739624023438, -88.0802001953125, 1.9114799499511719, -1.6434326171875, 80.43405151367188, 2.1509628295898438, 151.77842712402344, 105.28187561035156, -94.17376708984375, -90.13949584960938, -26.26559829711914, 51.331634521484375, 132.19049072265625, -63.80036926269531, 169.5111541748047, 16.343589782714844, 48.899810791015625, -6.63677978515625, 170.17982482910156, 2.888416290283203, 198.28794860839844, 45.61195373535156, 31.57101058959961, 74.69084167480469, 62.36381530761719, 19.90068817138672, 200.14955139160156, 43.95281982421875, 0.08218002319335938, 32.36100769042969, 166.23794555664062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000373.npy"}
|
||||
{"epoch": 0.563869992441421, "step": 374, "batch_size": 64, "mean": 70.90689849853516, "std": 85.58552551269531, "min": -177.06617736816406, "p10": -8.753428649902341, "median": 65.10309600830078, "p90": 178.37656555175784, "max": 255.82626342773438, "pos_frac": 0.8125, "sample": [255.82626342773438, 161.4576873779297, 152.55274963378906, -9.486953735351562, 30.871692657470703, 143.54159545898438, 183.63282775878906, 180.95669555664062, 172.35626220703125, 55.650794982910156, 76.40189361572266, 5.5180511474609375, 108.09725952148438, 223.99685668945312, 93.60350799560547, 28.227432250976562, 150.201416015625, 3.6461181640625, 21.556045532226562, 66.51651763916016, 23.95368194580078, 142.72068786621094, 45.936466217041016, -114.092529296875, 99.58433532714844, 95.85210418701172, 55.7553596496582, 148.14434814453125, -6.557533264160156, 115.09397888183594, 65.96324157714844, 74.78582763671875, 0.28131866455078125, -2.4321670532226562, 87.29576110839844, 64.24295043945312, -0.7073974609375, -177.06617736816406, 214.57730102539062, 104.44993591308594, 29.130367279052734, -118.20099639892578, 36.17212677001953, 197.39707946777344, 194.38226318359375, 55.106468200683594, -57.12298583984375, -7.0418701171875, 54.006744384765625, 43.332496643066406, 140.60691833496094, 140.97119140625, 168.25144958496094, 71.65811157226562, 4.226490020751953, 3.004781723022461, 60.80150604248047, 106.45065307617188, 151.574462890625, 158.4941864013672, -56.79884338378906, -28.434951782226562, 47.94024658203125, -0.7729949951171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000374.npy"}
|
||||
{"epoch": 0.5653817082388511, "step": 375, "batch_size": 64, "mean": 50.14338684082031, "std": 86.98062896728516, "min": -165.0586700439453, "p10": -52.461466979980464, "median": 48.73200225830078, "p90": 162.56351470947266, "max": 232.71701049804688, "pos_frac": 0.703125, "sample": [-53.665870666503906, -89.1604232788086, 49.58544158935547, 123.93888854980469, 104.70933532714844, 23.38624382019043, 184.22467041015625, -5.643898010253906, 15.845458984375, 104.23683166503906, -9.801383972167969, 125.1505126953125, -42.63771057128906, 156.45701599121094, 5.613567352294922, -2.7975730895996094, 48.1585693359375, -109.75717163085938, 117.2968521118164, -4.170318603515625, 63.81245422363281, 170.54302978515625, 164.03424072265625, -80.95492553710938, 133.18975830078125, 164.929931640625, 3.2219314575195312, -43.283111572265625, 79.6273422241211, 152.55764770507812, 9.369056701660156, 1.6669063568115234, 9.923295974731445, -49.65119171142578, 65.94314575195312, 79.23548889160156, 88.5521011352539, 31.798294067382812, 171.16575622558594, 97.0689926147461, 130.4443359375, 49.30543518066406, 134.428955078125, -165.0586700439453, -27.761367797851562, 29.410507202148438, 184.68527221679688, 4.2193756103515625, 232.71701049804688, 101.13673400878906, 155.96461486816406, 72.51878356933594, -26.085304260253906, 144.46424865722656, -37.03146743774414, 12.960044860839844, 104.34581756591797, -100.65281677246094, -45.74329376220703, 159.13182067871094, -7.055320739746094, 114.49117279052734, 14.273811340332031, -79.65188598632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000375.npy"}
|
||||
{"epoch": 0.5668934240362812, "step": 376, "batch_size": 64, "mean": 61.93609619140625, "std": 90.99775695800781, "min": -151.67276000976562, "p10": -57.45163993835449, "median": 54.865549087524414, "p90": 171.8001724243164, "max": 278.3945007324219, "pos_frac": 0.671875, "sample": [42.028656005859375, -1.12957763671875, -2.496980667114258, 31.789199829101562, -4.337806701660156, -12.562908172607422, -5.5440826416015625, 136.7381134033203, -10.25738525390625, 61.01897430419922, 101.90115356445312, 92.51641845703125, -151.67276000976562, -2.6552276611328125, -71.17070007324219, 14.607139587402344, 185.243408203125, 72.191162109375, 1.3759803771972656, 278.3945007324219, 49.05436706542969, 187.9766845703125, 3.51531982421875, 156.44810485839844, 201.9864501953125, -19.016998291015625, 145.40591430664062, 150.71641540527344, 153.14895629882812, 103.46715545654297, 138.5232391357422, 55.66481018066406, 163.160888671875, 39.36134338378906, 95.82003021240234, 176.2707061767578, 166.05319213867188, -53.11837387084961, 170.65330505371094, -73.7933349609375, 65.64861297607422, -82.10392761230469, 155.71188354492188, 129.4333038330078, 123.1908187866211, 181.10641479492188, -59.308753967285156, 133.59225463867188, -46.53431701660156, 153.02975463867188, -77.99736785888672, 14.678176879882812, 22.025222778320312, 54.066287994384766, 172.29168701171875, -4.142267227172852, 136.5265350341797, -11.49819564819336, 162.9810791015625, 92.860595703125, -47.99699401855469, 6.27833366394043, -0.2756328582763672, -76.92874145507812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000376.npy"}
|
||||
{"epoch": 0.5684051398337112, "step": 377, "batch_size": 64, "mean": 44.500518798828125, "std": 81.45450592041016, "min": -157.58375549316406, "p10": -51.996906280517564, "median": 37.154232025146484, "p90": 155.04193267822268, "max": 195.81124877929688, "pos_frac": 0.703125, "sample": [112.82368469238281, -30.833091735839844, 136.34646606445312, 139.3655242919922, 75.48384094238281, 6.252204895019531, -110.66988372802734, -66.60916137695312, 44.422279357910156, 20.26293182373047, 163.38330078125, 4.930595397949219, 120.75086212158203, -2.5406494140625, 151.1416778564453, 138.0299072265625, -118.10298919677734, 69.17752838134766, -64.38551330566406, 10.3094482421875, 176.4456787109375, -4.85296630859375, 80.89469909667969, 27.922996520996094, 111.95130920410156, -153.3556671142578, 17.849082946777344, 195.81124877929688, 44.84914779663086, -16.933324813842773, 150.628173828125, -8.208707809448242, 125.6801986694336, 1.2455291748046875, -12.829574584960938, -59.63335418701172, 40.809539794921875, 33.498924255371094, 163.2021484375, 20.964004516601562, -0.9046401977539062, 84.89826202392578, 3.757326126098633, 84.69290924072266, 72.78536987304688, 164.6229705810547, 74.05935668945312, -11.054615020751953, -34.17852783203125, 48.625, 140.72686767578125, 14.246002197265625, -18.8316593170166, 162.4141082763672, 5.073738098144531, -13.385919570922852, 156.71347045898438, 86.63106536865234, -157.58375549316406, 9.832313537597656, 93.916015625, -8.019721984863281, 60.695343017578125, 92.82386779785156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000377.npy"}
|
||||
{"epoch": 0.5699168556311414, "step": 378, "batch_size": 64, "mean": 41.38316345214844, "std": 79.2090072631836, "min": -158.20065307617188, "p10": -26.569768905639645, "median": 21.010591506958008, "p90": 158.41674957275393, "max": 199.92413330078125, "pos_frac": 0.734375, "sample": [137.54983520507812, 81.33719635009766, 1.2856388092041016, 16.98041534423828, 84.2301254272461, 155.18374633789062, -43.80836486816406, 20.846141815185547, 47.82804870605469, 138.4029998779297, -158.20065307617188, 2.486268997192383, 199.92413330078125, -5.507617950439453, 164.7360076904297, 109.85675811767578, 74.85749816894531, 2.5538406372070312, 163.5877685546875, -5.361148834228516, 72.95286560058594, 5.04541015625, 13.33504867553711, 32.360984802246094, 159.8023223876953, 13.311752319335938, 72.76688385009766, 7.391632080078125, -88.572998046875, 22.274646759033203, 101.24305725097656, 26.582725524902344, -91.07025146484375, -2.9807567596435547, 113.14166259765625, 19.853302001953125, -22.104446411132812, 21.17504119873047, 135.62158203125, 1.766672134399414, 182.1999969482422, 80.03919982910156, -4.103302001953125, 143.28048706054688, 28.341400146484375, 141.70513916015625, -17.68301010131836, 162.44581604003906, 81.05286407470703, 9.675542831420898, 2.9078636169433594, -12.97747802734375, 6.363109588623047, -126.39891052246094, -28.483478546142578, 58.72160339355469, -20.165990829467773, 72.61654663085938, 0.32383155822753906, 74.03507232666016, -8.741580963134766, -142.8520050048828, -7.86088752746582, 171.414794921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000378.npy"}
|
||||
{"epoch": 0.5714285714285714, "step": 379, "batch_size": 64, "mean": 57.49687957763672, "std": 85.11766052246094, "min": -137.31060791015625, "p10": -44.91644325256347, "median": 38.99872589111328, "p90": 172.17576141357424, "max": 234.86965942382812, "pos_frac": 0.796875, "sample": [91.61009216308594, -16.74896240234375, 152.29263305664062, -6.4820098876953125, 11.3310546875, 53.68174743652344, 20.581274032592773, -137.31060791015625, 69.37583923339844, 8.377708435058594, 22.950286865234375, 45.13124084472656, -79.3480224609375, 131.56053161621094, 16.77845001220703, 234.86965942382812, 31.509368896484375, -49.08552932739258, 142.78488159179688, 179.755126953125, 231.873779296875, 99.84280395507812, 164.72315979003906, 137.72073364257812, 59.7392578125, -53.18408203125, 108.77721405029297, -88.10185241699219, 38.46198272705078, -66.27447509765625, 223.81027221679688, 39.53546905517578, 2.641082763671875, 56.0848274230957, -102.97454833984375, 183.18783569335938, 160.01123046875, 13.741188049316406, 19.83349609375, -5.067699432373047, -35.188575744628906, 12.181655883789062, 67.2416763305664, 175.1088409423828, 155.90480041503906, 10.97494125366211, 165.3319091796875, 219.92901611328125, 2.7388572692871094, -11.895401000976562, 121.98838806152344, 12.56414794921875, 30.452194213867188, 67.47614288330078, 128.8401641845703, 1.3498039245605469, 37.95338439941406, 68.6427001953125, 43.767520904541016, 3.706705093383789, 112.41986846923828, 28.341506958007812, 126.06886291503906, -14.09527587890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000379.npy"}
|
||||
{"epoch": 0.5729402872260015, "step": 380, "batch_size": 64, "mean": 70.16085815429688, "std": 86.60851287841797, "min": -167.0467987060547, "p10": -29.849593353271473, "median": 76.69994354248047, "p90": 165.26756896972657, "max": 226.17059326171875, "pos_frac": 0.796875, "sample": [46.816341400146484, 153.4566650390625, 94.56820678710938, 35.942962646484375, -65.07040405273438, -121.82357025146484, 164.75401306152344, 174.55010986328125, -3.693889617919922, 147.46875, 126.0699462890625, 49.318973541259766, -5.022472381591797, 85.29600524902344, 5.040317535400391, -36.77369689941406, -2.262226104736328, 66.30707550048828, 144.71627807617188, 165.30410766601562, 90.63096618652344, 70.2390365600586, -13.644866943359375, -167.0467987060547, 155.53634643554688, 4.8224945068359375, 133.69436645507812, 68.7490005493164, -33.92658996582031, 23.44746208190918, 133.23211669921875, 155.5203857421875, 84.5467300415039, 62.87276840209961, 226.17059326171875, 7.859340667724609, 150.49615478515625, 139.192626953125, 37.07170104980469, 1.2850112915039062, 152.43511962890625, -142.8895263671875, 164.08372497558594, 165.18231201171875, 55.591636657714844, 140.179931640625, -20.33660125732422, 111.66029357910156, -15.787567138671875, 129.14219665527344, 19.531723022460938, 3.1398448944091797, -60.27647399902344, 187.65133666992188, 171.0824737548828, 161.43173217773438, 8.456445693969727, 185.26614379882812, 132.49087524414062, 175.16580200195312, 83.16085052490234, 91.64608001708984, 16.09941291809082, 20.47467041015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000380.npy"}
|
||||
{"epoch": 0.5744520030234316, "step": 381, "batch_size": 64, "mean": 27.258869171142578, "std": 86.94832611083984, "min": -198.08131408691406, "p10": -63.65273971557617, "median": 13.121234893798828, "p90": 159.72483825683597, "max": 244.55096435546875, "pos_frac": 0.671875, "sample": [23.62115478515625, 8.481582641601562, -0.12315177917480469, -102.69611358642578, -198.08131408691406, 8.244754791259766, 0.06404876708984375, 26.39459228515625, 174.875732421875, 89.04182434082031, 6.957267761230469, -3.546710968017578, 13.226036071777344, 36.48945999145508, 0.8954753875732422, 244.55096435546875, 120.52037048339844, 0.4796943664550781, 76.42926025390625, -2.796234130859375, 13.890714645385742, -68.53356170654297, 7.156320571899414, 203.3791961669922, -102.58480072021484, 73.80365753173828, -60.24237060546875, -59.56256103515625, 102.42341613769531, -3.377614974975586, 65.20185089111328, 153.91375732421875, 14.491279602050781, 17.563705444335938, -19.43558120727539, 174.6349334716797, 16.87796401977539, -41.66022872924805, -133.48020935058594, 18.119216918945312, 162.21530151367188, -17.80712890625, 3.3057479858398438, 12.370290756225586, 49.95600128173828, 171.9183807373047, 135.66563415527344, 70.50848388671875, -2.9644908905029297, 41.08904266357422, 5.4776458740234375, 111.10916137695312, 137.65931701660156, -20.374374389648438, 38.21139144897461, 13.016433715820312, 84.03173828125, -194.16421508789062, -65.11432647705078, 50.3612060546875, -5.7095947265625, -57.029762268066406, -38.760475158691406, 163.988525390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000381.npy"}
|
||||
{"epoch": 0.5759637188208617, "step": 382, "batch_size": 64, "mean": 47.92848587036133, "std": 73.20858764648438, "min": -159.78189086914062, "p10": -24.244963073730467, "median": 34.97746276855469, "p90": 150.9285430908203, "max": 203.9752197265625, "pos_frac": 0.734375, "sample": [-19.94512939453125, 5.539163589477539, 197.3946533203125, 65.82040405273438, 176.03860473632812, -16.80731201171875, 59.18861389160156, 25.839317321777344, 121.35577392578125, 80.60134887695312, 68.65037536621094, -16.152359008789062, -26.726776123046875, 30.142913818359375, 139.53831481933594, 28.205947875976562, -24.608322143554688, -23.397125244140625, -159.78189086914062, 142.89120483398438, -1.851064682006836, 49.51771545410156, 13.827194213867188, 5.0595245361328125, -22.441390991210938, 1.136932373046875, 91.25035095214844, 52.03898620605469, -73.9872817993164, 170.49940490722656, 92.64730834960938, 17.946510314941406, -65.15394592285156, -65.72344970703125, 165.2784423828125, 3.7167510986328125, -46.50118637084961, 3.2739105224609375, 45.8795166015625, 151.23922729492188, 77.86575317382812, 203.9752197265625, -0.5671539306640625, 20.028427124023438, 36.452911376953125, 91.27526092529297, -13.392032623291016, -0.36084747314453125, 33.50201416015625, 74.9007339477539, 88.43649291992188, 113.6063232421875, -16.51462173461914, 52.76047134399414, 136.82427978515625, 1.9330940246582031, 70.15679931640625, 26.566524505615234, 150.20361328125, 173.60992431640625, 101.15092468261719, 116.86924743652344, 73.61024475097656, 13.088325500488281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000382.npy"}
|
||||
{"epoch": 0.5774754346182918, "step": 383, "batch_size": 64, "mean": 47.7313232421875, "std": 93.06739044189453, "min": -190.38516235351562, "p10": -71.63648071289063, "median": 35.90862274169922, "p90": 167.42366943359377, "max": 216.05841064453125, "pos_frac": 0.703125, "sample": [-2.6401004791259766, 147.76107788085938, 96.07708740234375, 13.147781372070312, 15.239799499511719, -56.343482971191406, 129.94815063476562, 61.26654052734375, 17.76280975341797, -92.96442413330078, 2.0104103088378906, -97.89082336425781, 79.33432006835938, 76.3807601928711, 111.87238311767578, 59.20513916015625, 161.27777099609375, -70.8265609741211, 182.63934326171875, 158.4870147705078, 33.64375305175781, -21.489776611328125, 181.1030731201172, -52.78460693359375, 161.98333740234375, 15.18035888671875, 41.61370849609375, -17.190933227539062, 21.247987747192383, 211.11627197265625, 136.03587341308594, -71.98358917236328, -21.242637634277344, 191.52938842773438, -27.931621551513672, 95.66665649414062, 140.4920654296875, -190.38516235351562, 9.732749938964844, 61.90171813964844, -114.64891815185547, 48.93284606933594, 14.33447265625, 38.173492431640625, 168.56802368164062, -90.20603942871094, 24.50849151611328, 44.56927490234375, -52.82945251464844, 216.05841064453125, 26.56195640563965, 183.177978515625, 111.95992279052734, 14.844459533691406, -9.01934814453125, 156.77752685546875, 135.54476928710938, 17.548980712890625, -14.196662902832031, -105.24613952636719, 163.6029052734375, -42.96269226074219, 164.75350952148438, 64.01342010498047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000383.npy"}
|
||||
{"epoch": 0.5789871504157218, "step": 384, "batch_size": 64, "mean": 49.12771987915039, "std": 79.06175231933594, "min": -213.82962036132812, "p10": -45.574657058715815, "median": 51.0227108001709, "p90": 165.42730407714845, "max": 200.09527587890625, "pos_frac": 0.828125, "sample": [-55.60302734375, 164.45773315429688, 54.28118896484375, 125.48223876953125, 4.371723175048828, 83.75676727294922, 91.39453887939453, 41.889015197753906, 175.3002166748047, -46.84699630737305, 41.071624755859375, 50.01200866699219, 64.806884765625, 11.556121826171875, -14.162635803222656, 33.5362548828125, 7.131126403808594, 187.81021118164062, 2.243255615234375, 165.84283447265625, -12.832252502441406, 56.010498046875, 18.28559112548828, -42.605865478515625, -117.80183410644531, 98.88407897949219, 200.09527587890625, 67.5927963256836, -117.42292022705078, 107.96833801269531, 170.3733673095703, 116.60302734375, 1.8310966491699219, 79.34886932373047, 88.78463745117188, 13.482650756835938, 172.52328491210938, 31.329017639160156, -213.82962036132812, 157.574462890625, 52.89727783203125, 70.02171325683594, 104.65558624267578, -56.58436584472656, 166.614990234375, 65.61843872070312, 36.688480377197266, 9.873956680297852, 52.03341293334961, 131.2876434326172, 67.47305297851562, 15.256546020507812, 67.11601257324219, 45.01145935058594, 12.589820861816406, 1.5444145202636719, -102.14981842041016, 48.77915954589844, 4.7113800048828125, -1.8161659240722656, 74.41494750976562, 127.8216323852539, 55.37895202636719, 30.409812927246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000384.npy"}
|
||||
{"epoch": 0.5804988662131519, "step": 385, "batch_size": 64, "mean": 65.61822509765625, "std": 79.57223510742188, "min": -72.46875, "p10": -36.385102081298825, "median": 51.721853256225586, "p90": 171.67349548339845, "max": 222.42005920410156, "pos_frac": 0.765625, "sample": [-66.4455795288086, -5.013889312744141, 161.19027709960938, 94.4317626953125, 36.44337463378906, 6.27386474609375, 47.4429931640625, 122.55654907226562, 26.537818908691406, 32.10809326171875, -57.4891471862793, 104.62692260742188, 90.99542236328125, 163.00970458984375, 48.185935974121094, 142.81671142578125, 170.908447265625, 137.0254669189453, 3.6257553100585938, 48.45671463012695, -36.77284240722656, -36.43205261230469, 31.843292236328125, 168.3587646484375, 172.00137329101562, 202.46080017089844, 44.492984771728516, 71.37992858886719, 54.98699188232422, 125.83366394042969, 112.97099304199219, -72.46875, 59.75813293457031, 5.939788818359375, 55.18144607543945, 175.99139404296875, 145.09906005859375, 97.06718444824219, 102.33221435546875, 120.56432342529297, -25.832401275634766, -53.174888610839844, 144.79647827148438, 40.443939208984375, 12.090843200683594, 222.42005920410156, 193.87351989746094, -40.82511901855469, -9.630779266357422, -15.495437622070312, -28.42620849609375, 30.07883071899414, -35.5584716796875, -11.470474243164062, 142.66156005859375, 78.18988800048828, 12.116035461425781, 164.66690063476562, -36.275550842285156, 185.0904541015625, 0.5410575866699219, 0.8648147583007812, 172.12527465820312, 148.02023315429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000385.npy"}
|
||||
{"epoch": 0.582010582010582, "step": 386, "batch_size": 64, "mean": 41.51055145263672, "std": 78.1550064086914, "min": -178.0516357421875, "p10": -51.868225097656236, "median": 50.86460494995117, "p90": 134.70151977539064, "max": 199.1033935546875, "pos_frac": 0.71875, "sample": [101.45449829101562, -101.66582489013672, -0.11167526245117188, 40.89390563964844, 62.05213928222656, -28.80859375, -0.05057525634765625, 78.06517028808594, 86.73280334472656, 89.35234832763672, 28.04815673828125, 75.45263671875, -141.26930236816406, 4.563720703125, 29.78821563720703, 134.08303833007812, 134.96658325195312, 112.89167785644531, 135.84475708007812, 104.24775695800781, 11.629308700561523, 79.8827133178711, 61.44989776611328, -68.00302124023438, -70.17611694335938, 145.56146240234375, 108.95903015136719, -21.448455810546875, 115.34307098388672, 81.0175552368164, 25.166725158691406, 87.27960205078125, -12.117332458496094, 32.6363525390625, -28.242435455322266, -30.709976196289062, 66.04798126220703, 13.790313720703125, 92.3328857421875, 172.02890014648438, 18.830413818359375, -37.527015686035156, 79.71987915039062, 102.15896606445312, 115.1524429321289, -33.0724983215332, -57.0184326171875, 26.336753845214844, 1.0009174346923828, 189.85545349121094, 14.923591613769531, -39.85107421875, -120.44732666015625, 127.13655090332031, 199.1033935546875, 60.835304260253906, 69.15065002441406, 74.02342224121094, 66.87236022949219, 154.89285278320312, 16.165481567382812, -16.987895965576172, 14.512672424316406, -178.0516357421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000386.npy"}
|
||||
{"epoch": 0.5835222978080121, "step": 387, "batch_size": 64, "mean": 62.90465545654297, "std": 74.74444580078125, "min": -128.70033264160156, "p10": -18.221213912963865, "median": 52.10095977783203, "p90": 170.13511810302737, "max": 250.6725616455078, "pos_frac": 0.828125, "sample": [54.61712646484375, 10.796043395996094, 139.71987915039062, -36.133636474609375, 2.732330322265625, 193.0237274169922, 68.07699584960938, -18.892475128173828, 41.05876159667969, 250.6725616455078, 15.768770217895508, -3.806528091430664, 53.304107666015625, 54.74986267089844, 172.0930633544922, 3.1594161987304688, 31.65789794921875, 158.94776916503906, 159.4835968017578, 25.07330322265625, 142.0902099609375, 171.48123168945312, 103.72880554199219, -28.54720687866211, 3.5342330932617188, 134.2291717529297, 1.9238052368164062, 23.91990089416504, 153.4217529296875, 61.231964111328125, 1.0049018859863281, -42.961944580078125, 37.651588439941406, 166.9941864013672, -16.654937744140625, 90.36579132080078, 56.1343879699707, 30.362428665161133, 23.084300994873047, 1.6632537841796875, -0.6272525787353516, 71.44160461425781, 94.1366958618164, -128.70033264160156, -36.23149108886719, 58.53251266479492, 133.59616088867188, 183.00814819335938, 50.89781188964844, 161.59140014648438, -10.79202651977539, 42.98335266113281, 25.076751708984375, 67.1085205078125, 73.07400512695312, 47.17800521850586, -28.65196990966797, 24.170209884643555, 87.02033996582031, 191.8923797607422, 215.38467407226562, 73.84685516357422, 27.999296188354492, 111.20199584960938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000387.npy"}
|
||||
{"epoch": 0.5850340136054422, "step": 388, "batch_size": 64, "mean": 39.246063232421875, "std": 90.58784484863281, "min": -157.66073608398438, "p10": -60.075384521484374, "median": 22.951900482177734, "p90": 159.32042846679687, "max": 248.94998168945312, "pos_frac": 0.6875, "sample": [177.1915283203125, 81.179443359375, 112.56918334960938, 191.6438446044922, 59.616233825683594, 126.25444030761719, 37.24211120605469, 30.103927612304688, 26.583457946777344, -31.44062042236328, 41.739234924316406, 147.84353637695312, -15.59527587890625, 107.61036682128906, -18.984939575195312, 248.94998168945312, 2.5588951110839844, 227.5823516845703, 15.460708618164062, 128.8626708984375, 149.32839965820312, 4.134613037109375, 152.59498596191406, 28.408340454101562, 159.7278289794922, -157.66073608398438, 59.54736328125, 66.01698303222656, 19.232667922973633, 5.330018997192383, -2.233896255493164, -11.84642219543457, -8.083694458007812, 153.1927490234375, -1.7084102630615234, 19.320343017578125, 29.540538787841797, -61.347381591796875, 49.24058532714844, -91.22280883789062, 49.268699645996094, 140.94091796875, -133.2836151123047, 126.59870910644531, 163.51364135742188, 3.8087310791015625, 10.217155456542969, -0.29138946533203125, 6.662353515625, 158.3698272705078, -10.716720581054688, 165.64488220214844, -54.79792785644531, 3.7493743896484375, -144.90725708007812, -57.107391357421875, -55.69544982910156, 74.09844970703125, -12.705142974853516, 4.4998931884765625, 47.66717529296875, -120.74915313720703, -115.92695617675781, 4.406038284301758], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000388.npy"}
|
||||
{"epoch": 0.5865457294028723, "step": 389, "batch_size": 64, "mean": 63.52134704589844, "std": 82.20355224609375, "min": -161.87442016601562, "p10": -25.388099288940428, "median": 45.486562728881836, "p90": 167.55018310546876, "max": 240.16595458984375, "pos_frac": 0.71875, "sample": [-41.85124206542969, -7.756376266479492, 93.2743148803711, 64.76824951171875, 129.68861389160156, 34.21611022949219, 132.18402099609375, 141.81417846679688, 197.17141723632812, 107.62567138671875, -30.80145263671875, -26.665542602539062, 66.37596130371094, 120.33551788330078, 25.194808959960938, 90.14234924316406, 97.735107421875, 158.99734497070312, 87.70462036132812, 20.15908432006836, 159.0444793701172, 163.75430297851562, -3.088716506958008, 28.543548583984375, 126.09165954589844, -9.347450256347656, 152.5412139892578, -21.858665466308594, 43.2095947265625, 83.90211486816406, -15.58123779296875, 142.30453491210938, -14.238479614257812, 166.29296875, 173.67555236816406, 28.61590576171875, 185.16482543945312, 88.06966400146484, 45.18075942993164, -161.87442016601562, 200.30581665039062, -5.021854400634766, 6.603796005249023, -22.407398223876953, 45.79236602783203, 1.3302764892578125, 168.0889892578125, -37.52909469604492, -72.95803833007812, 26.669464111328125, 4.836372375488281, 2.8432083129882812, 27.428466796875, -2.2688941955566406, -33.891868591308594, 192.0321044921875, 166.2193145751953, 145.9489288330078, 70.20196533203125, 240.16595458984375, -10.769838333129883, 132.52346801757812, 2.5629940032958984, -4.055332183837891], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000389.npy"}
|
||||
{"epoch": 0.5880574452003023, "step": 390, "batch_size": 64, "mean": 48.61649703979492, "std": 75.8238525390625, "min": -157.24493408203125, "p10": -45.011054229736324, "median": 35.629695892333984, "p90": 151.6125030517578, "max": 194.66983032226562, "pos_frac": 0.765625, "sample": [-57.43011474609375, 90.52062225341797, 71.67683410644531, 29.760955810546875, 139.5234375, 32.951759338378906, 146.2392578125, 113.17253875732422, -56.53630828857422, 27.780738830566406, 0.5633602142333984, -22.336624145507812, 136.79141235351562, 38.30763244628906, 0.5420303344726562, 189.57669067382812, 152.37564086914062, -46.48870849609375, 99.19126892089844, -48.569740295410156, 103.69972229003906, 8.967735290527344, 6.30535888671875, 164.8499755859375, 138.324951171875, 153.8355712890625, 12.747344970703125, -78.02105712890625, 1.1961860656738281, 110.29450225830078, -30.101234436035156, 6.962161064147949, -18.196455001831055, 43.09449005126953, -7.647449493408203, -41.563194274902344, -157.24493408203125, 96.4886474609375, 158.72828674316406, -0.10472488403320312, 149.83184814453125, 13.682535171508789, -6.390495300292969, 61.73756408691406, 1.0858802795410156, 82.78141784667969, 12.780525207519531, 22.19727325439453, 194.66983032226562, 38.437232971191406, 141.14041137695312, 111.9683837890625, 9.329460144042969, 46.93280029296875, 4.2330474853515625, 97.88555908203125, 0.15778160095214844, -77.71835327148438, 104.4072265625, -21.747045516967773, 91.44004821777344, 77.25474548339844, 68.4209976196289, 176.70855712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000390.npy"}
|
||||
{"epoch": 0.5895691609977324, "step": 391, "batch_size": 64, "mean": 57.98676300048828, "std": 90.58648681640625, "min": -160.9688262939453, "p10": -59.57695999145507, "median": 62.79921340942383, "p90": 169.31463317871095, "max": 248.51995849609375, "pos_frac": 0.765625, "sample": [20.531356811523438, -160.9688262939453, 166.819580078125, 61.09806823730469, 48.894813537597656, 173.717529296875, 8.204113006591797, 170.38394165039062, 162.4672393798828, 248.51995849609375, 72.13497924804688, -139.77308654785156, 133.4965057373047, -13.597282409667969, 80.41195678710938, 190.52145385742188, -95.60934448242188, 50.879356384277344, -42.44245910644531, 28.88903045654297, -4.6423797607421875, 126.61236572265625, 98.20098876953125, 64.50035858154297, 34.287940979003906, -49.57438659667969, -129.49888610839844, -15.47451400756836, 68.09986877441406, -13.949333190917969, 243.06178283691406, 114.82925415039062, 82.2523422241211, -0.47281646728515625, 78.2957763671875, 1.4441108703613281, 10.885377883911133, 85.67489624023438, 122.59837341308594, 134.411865234375, 121.63916015625, 74.36769104003906, -141.9099578857422, -74.69625854492188, 55.34619903564453, 49.18025588989258, 180.75030517578125, 23.67603302001953, 151.68975830078125, 129.85354614257812, 15.70953369140625, 3.4243621826171875, 163.7291259765625, 136.81314086914062, 175.35916137695312, 140.3863525390625, 94.74435424804688, 58.31150817871094, -63.86377716064453, 9.234001159667969, 31.112045288085938, 68.73500061035156, 92.86907196044922, -1.429718017578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000391.npy"}
|
||||
{"epoch": 0.5910808767951625, "step": 392, "batch_size": 64, "mean": 62.606689453125, "std": 92.0128402709961, "min": -165.59689331054688, "p10": -43.05695343017577, "median": 61.50918960571289, "p90": 180.29336395263675, "max": 289.62969970703125, "pos_frac": 0.78125, "sample": [-147.87344360351562, -0.9438934326171875, 47.67267608642578, 67.83274841308594, 26.48736572265625, -14.929786682128906, 162.65264892578125, -61.001808166503906, 95.02801513671875, 160.78640747070312, 82.38615417480469, -165.59689331054688, 123.04934692382812, 166.5957489013672, 201.16860961914062, 60.9912109375, 9.531936645507812, 162.54530334472656, 167.9248809814453, 11.22613525390625, 45.41089630126953, -9.073348999023438, 68.13851165771484, -95.69483184814453, 124.10527038574219, 158.05178833007812, 169.8861083984375, 62.02716827392578, 67.31622314453125, -109.38209533691406, 67.9498291015625, 196.2975311279297, 129.7496795654297, 16.81787109375, -49.79750061035156, 80.87351989746094, 31.99339485168457, 107.01535034179688, 84.74588012695312, 149.87200927734375, 196.93911743164062, 189.58401489257812, -79.77661895751953, 125.3349609375, 1.0677680969238281, 1.8387260437011719, 4.635368347167969, 29.448272705078125, 184.7536163330078, -0.2270965576171875, -0.9067840576171875, 289.62969970703125, 5.281543731689453, 36.2374267578125, -27.329010009765625, 7.023797988891602, 219.45458984375, -25.278411865234375, 27.785797119140625, 118.860595703125, 2.5564212799072266, 107.19384002685547, 39.33866882324219, 101.5452880859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000392.npy"}
|
||||
{"epoch": 0.5925925925925926, "step": 393, "batch_size": 64, "mean": 65.83646392822266, "std": 77.78910827636719, "min": -112.12355041503906, "p10": -20.756110572814936, "median": 52.91269302368164, "p90": 170.85834045410158, "max": 220.63088989257812, "pos_frac": 0.765625, "sample": [90.72691345214844, -5.185686111450195, 128.29510498046875, 172.94509887695312, 167.26417541503906, 102.49962615966797, 36.08159255981445, 52.336265563964844, 75.51485443115234, 50.651432037353516, -27.493085861206055, 2.3225173950195312, 189.53118896484375, 142.10235595703125, 53.48912048339844, -47.51769256591797, 0.5670089721679688, 177.1315155029297, 11.789474487304688, -3.8591575622558594, -10.627105712890625, 3.9502811431884766, 58.8582763671875, 189.2994384765625, 129.18975830078125, 164.65846252441406, -27.36454200744629, 142.9381561279297, 125.40423583984375, 128.0674591064453, 51.63671875, -62.071964263916016, 152.6866912841797, 172.39869689941406, -0.20614051818847656, 40.582000732421875, -73.949951171875, 181.14584350585938, -2.0704727172851562, -15.358335494995117, 101.40644073486328, 19.140213012695312, 143.96820068359375, -8.197750091552734, 60.346717834472656, 137.7482147216797, 140.75933837890625, 5.3048248291015625, 48.81349563598633, 153.31997680664062, 159.7269744873047, 220.63088989257812, 68.75401306152344, -8.459907531738281, 19.052169799804688, 140.07696533203125, 31.232406616210938, 56.85972595214844, -112.12355041503906, -23.069442749023438, 11.720279693603516, 114.74433898925781, 6.3296051025390625, 7.0894927978515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000393.npy"}
|
||||
{"epoch": 0.5941043083900227, "step": 394, "batch_size": 64, "mean": 57.29436492919922, "std": 71.10365295410156, "min": -146.14430236816406, "p10": -23.53983154296875, "median": 60.642974853515625, "p90": 150.931591796875, "max": 207.93258666992188, "pos_frac": 0.796875, "sample": [19.451385498046875, 162.2596893310547, 118.40864562988281, 27.69927978515625, 19.61724853515625, 52.209686279296875, -10.49664306640625, 55.945404052734375, 191.86770629882812, 47.6055908203125, 179.69598388671875, 122.33135223388672, 28.761920928955078, 48.30876541137695, -48.58555221557617, 47.01769256591797, -12.268463134765625, -10.260408401489258, -23.2041015625, 19.866886138916016, 147.3083953857422, -35.327884674072266, 69.52862548828125, 71.39598083496094, 73.77550506591797, 182.87405395507812, 134.70896911621094, 100.82539367675781, 125.9417953491211, 5.006004333496094, -93.53326416015625, 22.10113525390625, 159.53236389160156, 82.94773864746094, 50.48133850097656, 104.67678833007812, -1.206472396850586, 91.30381774902344, 65.34054565429688, 9.044368743896484, 95.03239440917969, -58.903133392333984, -23.6837158203125, 67.92356872558594, 88.99201965332031, 207.93258666992188, -146.14430236816406, 120.641845703125, -3.4254074096679688, 68.78436279296875, -86.09626770019531, 88.54349517822266, 48.59941864013672, 89.1754150390625, 72.329833984375, 69.4211654663086, 75.17936706542969, 127.46065521240234, 2.488067626953125, 30.751495361328125, 18.12139892578125, 152.48439025878906, 41.927154541015625, 116.34636688232422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000394.npy"}
|
||||
{"epoch": 0.5956160241874527, "step": 395, "batch_size": 64, "mean": 48.873130798339844, "std": 78.11553192138672, "min": -108.2734146118164, "p10": -34.2186222076416, "median": 24.753028869628906, "p90": 159.29958801269532, "max": 248.25631713867188, "pos_frac": 0.75, "sample": [121.807861328125, 143.13153076171875, 109.36921691894531, 7.179145812988281, 129.60739135742188, 54.74995422363281, -2.3976497650146484, 95.38814544677734, 177.62948608398438, -8.758401870727539, 37.35833740234375, 81.08734130859375, 187.5232391357422, 126.7016372680664, 24.961959838867188, 168.2480926513672, -16.34912109375, 89.86860656738281, 23.20654296875, 9.414077758789062, -3.7733993530273438, 17.28095245361328, 5.619354248046875, 65.02815246582031, 146.5640106201172, -108.2734146118164, 30.501815795898438, 158.351806640625, 60.47018814086914, -9.503639221191406, 62.66754150390625, 248.25631713867188, 59.11322021484375, 41.367515563964844, -88.84663391113281, 24.544097900390625, -34.30772018432617, 14.339344024658203, 78.07317352294922, -47.74150848388672, -8.146759033203125, 0.6120052337646484, 147.69741821289062, 13.801799774169922, 4.228305816650391, 74.23126220703125, 5.122276306152344, 203.4977569580078, -34.01072692871094, -19.427804946899414, 3.9191627502441406, 234.90150451660156, -52.00238037109375, 6.570751190185547, 30.952438354492188, 159.70578002929688, -53.1710090637207, 19.441696166992188, 90.04056549072266, -74.5826187133789, 9.782711029052734, -19.3448486328125, 7.009576797485352, 97.59308624267578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000395.npy"}
|
||||
{"epoch": 0.5971277399848829, "step": 396, "batch_size": 64, "mean": 55.221946716308594, "std": 74.20340728759766, "min": -131.3070068359375, "p10": -19.476825332641596, "median": 35.602054595947266, "p90": 162.9152374267578, "max": 235.9838104248047, "pos_frac": 0.765625, "sample": [163.69155883789062, 235.9838104248047, 152.2238006591797, 152.63955688476562, 150.28936767578125, 102.75721740722656, 16.444400787353516, 161.10382080078125, 55.973655700683594, 32.70624542236328, 86.28142547607422, 20.261383056640625, 119.44330596923828, -35.2901496887207, 46.289466857910156, -1.6798248291015625, 39.33819580078125, 71.42935943603516, 1.1971168518066406, 51.66691589355469, 75.24996948242188, -34.544036865234375, 183.55612182617188, 47.49987030029297, -1.5494155883789062, 37.048866271972656, 119.21755981445312, -131.3070068359375, 125.24165344238281, 101.7171630859375, 3.4399986267089844, -71.59783935546875, 0.5941963195800781, 139.69699096679688, 34.155242919921875, 26.150054931640625, 106.80072784423828, 17.738174438476562, 190.93936157226562, 180.233154296875, -3.7282447814941406, 18.837472915649414, 1.1673240661621094, 87.82386779785156, -0.40468597412109375, 54.4490966796875, 16.255714416503906, 13.640769958496094, 77.82913208007812, -48.242919921875, -5.4463043212890625, -21.886455535888672, 14.481386184692383, 17.6807861328125, -6.066730499267578, 159.93951416015625, 66.95740509033203, 176.65614318847656, 4.620517730712891, 28.419858932495117, -13.854354858398438, -31.523868560791016, -10.784324645996094, 164.35226440429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000396.npy"}
|
||||
{"epoch": 0.5986394557823129, "step": 397, "batch_size": 64, "mean": 31.386646270751953, "std": 90.03242492675781, "min": -193.88819885253906, "p10": -73.32124328613281, "median": 31.339957237243652, "p90": 145.013542175293, "max": 265.2132873535156, "pos_frac": 0.671875, "sample": [-0.5114612579345703, 40.77283477783203, -68.66233825683594, 52.648895263671875, -53.23743438720703, 80.37959289550781, 134.44515991210938, 64.26660919189453, 134.16424560546875, -18.550186157226562, 1.68646240234375, 65.21746826171875, 17.14574432373047, 95.66682434082031, 80.7036361694336, 104.1374740600586, -178.25958251953125, -109.06101989746094, 51.58114242553711, -0.6762313842773438, 99.35002136230469, -7.1957550048828125, 75.79898071289062, 96.13827514648438, 11.734474182128906, -193.88819885253906, -75.31791687011719, 16.367774963378906, 171.19052124023438, 53.15068054199219, 14.881065368652344, 28.41437339782715, 146.799072265625, 43.921966552734375, 162.02247619628906, -26.16412353515625, 56.18488693237305, 155.71823120117188, 123.75061798095703, 7.196245193481445, 5.768768310546875, 190.40420532226562, 115.490234375, 45.80455017089844, -171.86558532714844, 42.996185302734375, 265.2132873535156, -55.963409423828125, -46.85951232910156, 7.132598876953125, -0.6822624206542969, 63.47349548339844, -2.7666854858398438, 14.37957763671875, 0.4320850372314453, -35.407127380371094, 34.265541076660156, 54.26622772216797, -36.995880126953125, 140.84730529785156, -146.82470703125, -99.40375518798828, -0.3940258026123047, 171.52294921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000397.npy"}
|
||||
{"epoch": 0.600151171579743, "step": 398, "batch_size": 64, "mean": 69.06472778320312, "std": 80.27223205566406, "min": -60.52428436279297, "p10": -19.733415985107417, "median": 56.73008346557617, "p90": 171.42610778808594, "max": 331.87750244140625, "pos_frac": 0.75, "sample": [150.94195556640625, 79.04164123535156, 122.48501586914062, 85.25968933105469, 170.4652099609375, 16.585521697998047, 171.83792114257812, 179.07098388671875, -16.303115844726562, 172.3701171875, 113.53121948242188, 20.1328125, -5.8788604736328125, -7.812225341796875, 110.56787109375, 11.151626586914062, 126.56659698486328, -5.514945983886719, 69.5992431640625, -52.516353607177734, 107.07423400878906, -3.7041873931884766, 175.66143798828125, 219.13282775878906, -0.8002243041992188, 163.5821533203125, 144.78436279296875, 117.7107925415039, 36.18061065673828, 38.79895782470703, -41.11690902709961, 50.861602783203125, 14.221389770507812, 155.02304077148438, 0.5494613647460938, -6.43128776550293, 44.31719970703125, 147.47862243652344, 29.122879028320312, 71.84841918945312, -29.017532348632812, -21.20354461669922, 160.17449951171875, 86.65730285644531, 120.61764526367188, 125.80765533447266, 20.01837730407715, 7.349449157714844, -13.068286895751953, 82.81735229492188, 3.6030960083007812, 331.87750244140625, 20.41547393798828, -4.997810363769531, 72.4058837890625, -29.46483612060547, 188.71578979492188, -60.52428436279297, 62.59856414794922, 8.629739761352539, -27.303646087646484, 145.73483276367188, 167.80325317382812, 24.619028091430664], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000398.npy"}
|
||||
{"epoch": 0.6016628873771731, "step": 399, "batch_size": 64, "mean": 76.416748046875, "std": 90.19219970703125, "min": -108.41934204101562, "p10": -29.39640884399414, "median": 68.89600372314453, "p90": 185.1110626220703, "max": 347.5167236328125, "pos_frac": 0.78125, "sample": [128.98843383789062, 50.612571716308594, 128.61911010742188, 220.61566162109375, -23.325454711914062, 99.001220703125, 157.60443115234375, 37.99542999267578, -7.366687774658203, -12.340660095214844, -78.98365783691406, 70.0672607421875, 80.69137573242188, -53.928226470947266, 39.91340637207031, 73.77091979980469, 67.72474670410156, 87.57278442382812, 81.51856994628906, 216.69961547851562, -43.15374755859375, -21.022705078125, -108.41934204101562, 131.69683837890625, 255.72775268554688, 33.47703552246094, 12.29989242553711, 347.5167236328125, 24.77904510498047, 8.517303466796875, 19.689598083496094, 1.8896770477294922, 95.02970886230469, 43.630306243896484, 47.86260986328125, -29.625022888183594, 62.62488555908203, 185.2361602783203, 47.07518768310547, 195.8211212158203, 51.987937927246094, 1.0121097564697266, 183.2820281982422, -28.86297607421875, 183.279541015625, 118.94127655029297, 79.64126586914062, 76.54621887207031, -53.978912353515625, -23.338050842285156, 166.90499877929688, 184.8191680908203, 164.22854614257812, 126.22351837158203, 146.0625457763672, 183.45184326171875, 133.92977905273438, 192.06336975097656, 67.65553283691406, -51.3268928527832, 183.929931640625, -13.07241439819336, 84.3385238647461, 56.849266052246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000399.npy"}
|
||||
{"epoch": 0.6031746031746031, "step": 400, "batch_size": 64, "mean": 45.55181884765625, "std": 78.96610260009766, "min": -153.592041015625, "p10": -44.43058624267577, "median": 31.861412048339844, "p90": 158.2266494750977, "max": 207.0196533203125, "pos_frac": 0.6875, "sample": [-9.058307647705078, 83.48294830322266, 45.888763427734375, -27.469284057617188, 110.59190368652344, 117.02870178222656, 21.179466247558594, 124.91325378417969, 94.40908813476562, 150.118896484375, 48.435176849365234, -53.819297790527344, 31.906200408935547, 2.2739219665527344, 1.8120231628417969, -8.626983642578125, 39.44384765625, 161.70140075683594, 126.64850616455078, 126.61572265625, -10.924163818359375, 37.69504165649414, 103.04005432128906, 1.451364517211914, -88.45919799804688, 94.36771392822266, 16.196813583374023, 13.669784545898438, 21.65677261352539, -5.52385139465332, -69.28762817382812, -5.954713821411133, 123.16908264160156, -9.051994323730469, 172.15025329589844, -5.545551300048828, -49.22935485839844, 31.81662368774414, 163.600341796875, 113.51670837402344, 184.412109375, 110.95195770263672, -153.592041015625, 113.54141235351562, 14.454246520996094, 38.683311462402344, -3.95953369140625, -33.23345947265625, -30.873023986816406, 207.0196533203125, 59.40803909301758, 138.70681762695312, -54.57018280029297, 48.04120635986328, -7.83320426940918, 175.38661193847656, 192.22042846679688, 68.84597778320312, 22.43762969970703, 28.8743896484375, -138.54122924804688, -10.498577117919922, 15.475624084472656, 94.12809753417969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000400.npy"}
|
||||
{"epoch": 0.6046863189720333, "step": 401, "batch_size": 64, "mean": 63.84806442260742, "std": 78.67155456542969, "min": -78.55819702148438, "p10": -28.344054985046384, "median": 49.743431091308594, "p90": 177.57391967773438, "max": 237.425537109375, "pos_frac": 0.78125, "sample": [-15.590339660644531, 136.40017700195312, 155.97943115234375, 53.191925048828125, -38.5428581237793, 1.8406906127929688, 15.42236328125, 58.03168487548828, -30.061038970947266, 7.390083312988281, 182.31661987304688, -1.0426177978515625, -78.55819702148438, -4.00811767578125, 35.756736755371094, -31.505409240722656, 86.37783813476562, 237.425537109375, 154.4121856689453, 166.0648193359375, 64.24430084228516, 67.08197021484375, 46.29493713378906, 20.50180435180664, -42.9488525390625, -59.75353240966797, 66.71235656738281, 95.28966522216797, 75.27826690673828, 64.13394927978516, 95.23139953613281, 32.786094665527344, 10.147294998168945, 138.77304077148438, -5.278358459472656, 182.20407104492188, 5.11732292175293, 144.66238403320312, 27.91258430480957, 176.1805419921875, 13.3828125, 171.93572998046875, 89.57781982421875, -24.337759017944336, 178.17108154296875, 15.163562774658203, -9.9483642578125, 218.26486206054688, 158.04342651367188, 101.3492660522461, 109.4002914428711, 2.6054515838623047, 11.424152374267578, 7.0219879150390625, -52.396968841552734, 87.67406463623047, 76.65545654296875, 19.79361915588379, 166.3707275390625, 179.1414337158203, 39.59405517578125, -9.004405975341797, 20.11294937133789, 220.408203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000401.npy"}
|
||||
{"epoch": 0.6061980347694633, "step": 402, "batch_size": 64, "mean": 51.89130401611328, "std": 89.5604476928711, "min": -145.2467803955078, "p10": -47.09790115356445, "median": 47.132925033569336, "p90": 178.34443817138674, "max": 212.55813598632812, "pos_frac": 0.734375, "sample": [89.79364013671875, 187.03561401367188, 134.11276245117188, 127.2749252319336, 124.21403503417969, 75.41767120361328, 205.2568817138672, 28.49835205078125, 43.99815368652344, 82.7868423461914, -45.23402404785156, 200.64498901367188, 48.80693817138672, -46.47602844238281, 52.500274658203125, 45.58609390258789, -3.5704193115234375, 21.93169593811035, 31.237945556640625, 30.906356811523438, 104.72478485107422, -0.8029403686523438, 3.0608291625976562, -107.14056396484375, 212.55813598632812, 5.581562042236328, 77.2610855102539, 175.10299682617188, 67.76275634765625, 23.473350524902344, 179.73362731933594, 158.66171264648438, -11.474258422851562, 101.7269287109375, -9.912002563476562, 60.423194885253906, 158.85433959960938, 154.4823760986328, 5.8433685302734375, 81.60733795166016, -47.364418029785156, -112.55859375, -133.65545654296875, 109.89258575439453, 28.117889404296875, 10.324644088745117, 46.998077392578125, -67.99003601074219, 6.106834411621094, 8.416828155517578, 143.79747009277344, 187.65155029296875, 141.0561981201172, 210.1410675048828, -44.54161071777344, -15.932210922241211, -18.94072723388672, -145.2467803955078, -126.80026245117188, 60.99110412597656, 47.26777267456055, 146.96438598632812, -42.61222839355469, 52.70796203613281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000402.npy"}
|
||||
{"epoch": 0.6077097505668935, "step": 403, "batch_size": 64, "mean": 64.29563903808594, "std": 89.21684265136719, "min": -151.9116973876953, "p10": -43.852512741088866, "median": 40.84467315673828, "p90": 174.77097778320314, "max": 291.33734130859375, "pos_frac": 0.765625, "sample": [-6.236045837402344, -1.8612060546875, 233.45248413085938, 23.99713134765625, 169.46453857421875, 45.17509460449219, 203.2923583984375, 144.96531677246094, 24.324798583984375, 0.0124664306640625, 55.489707946777344, -48.079742431640625, 129.39175415039062, -46.9937629699707, 155.86660766601562, 98.02374267578125, 55.323448181152344, 11.236307144165039, 35.42578125, 7.121976852416992, -0.1575756072998047, -55.38499069213867, 291.33734130859375, 160.53884887695312, -19.779266357421875, 127.62516784667969, 172.12850952148438, 5.777727127075195, 175.73655700683594, 19.293556213378906, -2.331554412841797, 75.53837585449219, 34.87324523925781, 28.40337371826172, 53.35804748535156, -25.365901947021484, -1.9158878326416016, 172.51795959472656, 23.21246910095215, 103.70596313476562, 24.8052978515625, 1.3835163116455078, 167.54115295410156, 14.741710662841797, 110.64590454101562, -45.374603271484375, 136.47093200683594, 141.41641235351562, 87.49435424804688, 36.514251708984375, 175.87820434570312, 178.09149169921875, -151.9116973876953, -40.300968170166016, 211.698486328125, -133.6613006591797, 148.84933471679688, 70.5569839477539, 68.95854187011719, 161.12844848632812, -49.87884521484375, 7.790107727050781, 35.90235137939453, 127.67613983154297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000403.npy"}
|
||||
{"epoch": 0.6092214663643235, "step": 404, "batch_size": 64, "mean": 65.85258483886719, "std": 91.6248550415039, "min": -110.55172729492188, "p10": -50.26800460815429, "median": 56.16357231140137, "p90": 183.97691040039064, "max": 258.02081298828125, "pos_frac": 0.71875, "sample": [-43.429229736328125, 84.82330322265625, 101.20563507080078, 4.7730865478515625, 64.42462921142578, 11.844200134277344, 92.0953598022461, 13.088539123535156, -52.446083068847656, -43.68745422363281, 188.0244140625, -45.185821533203125, -110.55172729492188, -83.08381652832031, -1.0331268310546875, 215.81671142578125, -23.408355712890625, 3.8336563110351562, 176.42788696289062, 181.87460327148438, 102.461181640625, 129.57025146484375, 118.22845458984375, 192.4124755859375, -26.3126220703125, 146.813720703125, -70.27930450439453, 195.84906005859375, 166.74790954589844, 144.8189239501953, 8.552452087402344, 123.4139404296875, -11.44752311706543, 147.64837646484375, -24.33831024169922, 5.978401184082031, 35.50330352783203, 133.74642944335938, 114.8193588256836, -3.028644561767578, -0.6594467163085938, -10.778091430664062, 126.53575134277344, 38.44371032714844, 161.91888427734375, 11.460330963134766, 131.80352783203125, 175.226806640625, 180.5598907470703, 57.422019958496094, 1.1193656921386719, 247.5648193359375, 258.02081298828125, 73.85890197753906, 142.7744598388672, 117.89461517333984, 184.87789916992188, -62.332672119140625, -65.48960876464844, -57.66786193847656, 49.56217956542969, 54.90512466430664, 11.2149658203125, 19.764991760253906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000404.npy"}
|
||||
{"epoch": 0.6107331821617535, "step": 405, "batch_size": 64, "mean": 52.126182556152344, "std": 81.63004302978516, "min": -145.21206665039062, "p10": -36.25179214477538, "median": 31.728529930114746, "p90": 173.07973937988282, "max": 217.66409301757812, "pos_frac": 0.671875, "sample": [145.1044921875, -13.40749740600586, -72.32199096679688, 186.7453155517578, 19.49394989013672, 19.711624145507812, 171.31582641601562, -17.081649780273438, -5.049900054931641, -44.40150451660156, -17.352874755859375, 4.493135452270508, 3.812173843383789, -3.91461181640625, 79.26568603515625, 9.306121826171875, 211.34063720703125, -1.4345149993896484, -7.3360443115234375, 96.79104614257812, 163.8567657470703, 136.4842529296875, 9.62542724609375, 76.47804260253906, -1.42901611328125, 36.48434829711914, 119.2493896484375, 171.4796600341797, -52.18864440917969, 103.89424896240234, 21.273479461669922, -0.9312324523925781, 140.2701873779297, 175.75173950195312, -13.10125732421875, 72.9669418334961, 15.294811248779297, -145.21206665039062, 71.85958099365234, 51.517425537109375, 36.270782470703125, -48.159332275390625, 56.750831604003906, 42.27186584472656, 15.865394592285156, 27.186277389526367, 10.117820739746094, 47.95032501220703, 192.6544189453125, 149.8254852294922, -17.83553695678711, -22.601348876953125, 116.2808609008789, 39.89002227783203, 173.76548767089844, 177.7473907470703, -9.648574829101562, 47.39980697631836, 144.854248046875, -73.78070068359375, -15.858192443847656, 217.66409301757812, 150.8626708984375, -42.10198211669922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000405.npy"}
|
||||
{"epoch": 0.6122448979591837, "step": 406, "batch_size": 64, "mean": 68.86474609375, "std": 92.13691711425781, "min": -166.35293579101562, "p10": -48.23718910217284, "median": 67.3656234741211, "p90": 182.5234085083008, "max": 223.2860870361328, "pos_frac": 0.75, "sample": [171.06521606445312, 46.17817687988281, -37.225547790527344, -26.899032592773438, 137.86685180664062, 92.9223403930664, 159.92825317382812, 138.0626220703125, -51.09686279296875, 213.64671325683594, 124.48018646240234, 143.62948608398438, -3.796642303466797, -13.479141235351562, 190.4593505859375, 223.2860870361328, 206.77639770507812, 22.038454055786133, 65.56938934326172, 172.4215087890625, 81.358642578125, 24.089195251464844, 96.4884033203125, 69.16185760498047, 21.876121520996094, 126.37147521972656, -41.56461715698242, 122.26138305664062, -5.51786994934082, 65.18629455566406, -0.4924964904785156, 0.8511428833007812, -146.0242156982422, -20.90988540649414, 47.5299072265625, 184.02685546875, 76.38937377929688, 179.01536560058594, -72.21450805664062, 196.60675048828125, 103.78634643554688, 38.77876281738281, 52.94001007080078, 209.6139373779297, 15.153861999511719, 162.70025634765625, -79.0555419921875, 14.256402969360352, 118.32384490966797, 156.426513671875, 18.611631393432617, -53.724609375, 174.47315979003906, -101.97731018066406, 41.06553649902344, -166.35293579101562, 104.40460205078125, 130.39434814453125, -23.097557067871094, 36.071929931640625, 132.8109893798828, 116.80245971679688, 65.375732421875, 159.2380828857422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000406.npy"}
|
||||
{"epoch": 0.6137566137566137, "step": 407, "batch_size": 64, "mean": 53.44386291503906, "std": 92.25106811523438, "min": -184.30499267578125, "p10": -42.90428237915039, "median": 51.365623474121094, "p90": 180.04205169677735, "max": 239.3485565185547, "pos_frac": 0.6875, "sample": [-0.904205322265625, -8.981367111206055, -40.689208984375, 61.76000213623047, -41.982643127441406, 162.26976013183594, 171.76451110839844, -43.29927062988281, -94.87162780761719, 114.01642608642578, 82.68780517578125, 66.87156677246094, 3.2315673828125, 9.54864501953125, -105.81712341308594, 16.12102508544922, 90.00206756591797, 143.50119018554688, 194.20370483398438, 168.57740783691406, 62.988800048828125, 9.785051345825195, 71.25675964355469, 39.18585205078125, 91.24429321289062, 92.0487060546875, 1.2097244262695312, -4.5395965576171875, 148.62538146972656, 195.33804321289062, 106.1268310546875, 3.2014236450195312, 185.4881591796875, 176.0291748046875, 143.96981811523438, 6.613273620605469, -91.86129760742188, 124.94924926757812, -58.81376647949219, 40.97124481201172, -8.013565063476562, 103.74649047851562, 181.76185607910156, 85.04239654541016, 193.91036987304688, 239.3485565185547, 123.40352630615234, 1.8524703979492188, 193.139404296875, -0.8230781555175781, 38.6878662109375, -184.30499267578125, -147.00375366210938, -34.489715576171875, -40.16020965576172, 38.80100631713867, -12.699739456176758, 162.96421813964844, -35.10245895385742, -9.9111328125, -6.468208312988281, 96.15130615234375, 68.59003448486328, 80.15719604492188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000407.npy"}
|
||||
{"epoch": 0.6152683295540439, "step": 408, "batch_size": 64, "mean": 78.43579864501953, "std": 85.00336456298828, "min": -117.76171875, "p10": -14.623588562011715, "median": 76.24713134765625, "p90": 183.7441589355469, "max": 235.68319702148438, "pos_frac": 0.796875, "sample": [64.60433197021484, 169.95082092285156, 141.2507781982422, 98.44760131835938, -0.8595809936523438, -59.86448669433594, -117.76171875, 4.343479156494141, 153.39158630371094, 85.57600402832031, 61.643409729003906, 202.84127807617188, 0.1787738800048828, 124.14204406738281, 117.29786682128906, 235.68319702148438, 35.74074935913086, -24.930641174316406, 44.57457733154297, 28.63970947265625, 93.40489196777344, 6.664634704589844, -6.52018928527832, 156.69456481933594, 208.24969482421875, -91.18003845214844, 68.86563873291016, 207.91749572753906, 123.17930603027344, 158.5583953857422, 180.5320587158203, -25.910633087158203, 117.64630889892578, 11.63245964050293, 14.812652587890625, 172.2046356201172, 30.00113296508789, -16.255773544311523, 214.99684143066406, 71.06831359863281, 55.74317169189453, 171.2251434326172, 178.24624633789062, 185.1207733154297, 131.3207244873047, 16.35066795349121, 139.42950439453125, -10.815156936645508, 59.846160888671875, -5.3458404541015625, 170.15283203125, 90.14761352539062, 193.24932861328125, -112.0803451538086, 161.28955078125, 123.7747802734375, 47.42326736450195, -1.4613609313964844, 0.2164459228515625, 54.432125091552734, 132.43466186523438, 81.42594909667969, -8.464370727539062, 104.77726745605469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000408.npy"}
|
||||
{"epoch": 0.6167800453514739, "step": 409, "batch_size": 64, "mean": 38.24311828613281, "std": 79.92449188232422, "min": -161.86778259277344, "p10": -32.907600784301756, "median": 19.802699089050293, "p90": 164.2986511230469, "max": 197.000244140625, "pos_frac": 0.640625, "sample": [17.200042724609375, 8.863969802856445, -13.8104248046875, -25.500900268554688, 0.4541893005371094, -22.314151763916016, 24.259920120239258, -33.8000602722168, 40.99527359008789, -25.50507354736328, -25.663082122802734, 179.24542236328125, -11.402101516723633, 177.72946166992188, 79.89924621582031, 5.593658447265625, 56.55560302734375, -2.5956649780273438, -0.2734794616699219, 190.9232940673828, 131.8116455078125, -30.617023468017578, 109.29661560058594, 168.1021728515625, 17.708572387695312, -73.480224609375, -18.653213500976562, 89.94805908203125, 45.953704833984375, -2.19482421875, -52.63056945800781, 63.52571105957031, 112.48758697509766, -19.845752716064453, 140.34503173828125, -12.458364486694336, 155.42376708984375, 121.22469329833984, 45.54402160644531, 20.90523338317871, -70.95465087890625, 3.4725112915039062, 103.67552185058594, -77.99781799316406, 13.205375671386719, -30.8251953125, 18.700164794921875, 83.97735595703125, -159.33079528808594, 178.7406768798828, 24.32474136352539, 125.75183868408203, -1.1602935791015625, 181.65228271484375, -161.86778259277344, 15.488410949707031, 67.58642578125, 70.7955551147461, 70.80167388916016, 57.34352111816406, 197.000244140625, -29.445234298706055, 56.99407958984375, 76.37895202636719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000409.npy"}
|
||||
{"epoch": 0.618291761148904, "step": 410, "batch_size": 64, "mean": 63.53340148925781, "std": 74.2651138305664, "min": -116.40338897705078, "p10": -29.07407531738281, "median": 63.23435592651367, "p90": 162.59804840087892, "max": 250.0213165283203, "pos_frac": 0.8125, "sample": [73.4267807006836, 78.11689758300781, 22.204387664794922, -2.556743621826172, 169.9302978515625, 38.716033935546875, 94.27540588378906, 34.17558670043945, -44.40419006347656, -116.40338897705078, 77.70580291748047, 174.0205841064453, 5.307586669921875, -54.572975158691406, 152.76031494140625, 75.49767303466797, 143.68487548828125, -27.702850341796875, 99.98101806640625, 184.03256225585938, 250.0213165283203, 50.896507263183594, 9.45654296875, 107.30939483642578, 0.2461395263671875, 126.55825805664062, 12.780563354492188, 103.22714233398438, 32.262977600097656, -30.944969177246094, 121.98563385009766, -29.6617431640625, 20.04220962524414, 128.72879028320312, 24.890716552734375, 204.67025756835938, 162.9394073486328, 31.259357452392578, 68.9805908203125, 67.64840698242188, -16.57776641845703, -51.26568603515625, 83.10028076171875, 72.97300720214844, 134.61599731445312, 63.64581298828125, 153.40919494628906, -2.9811630249023438, 10.409107208251953, 94.20108032226562, 27.185333251953125, 97.25784301757812, 215.38934326171875, 62.70098876953125, 41.324615478515625, 62.822898864746094, -6.348049163818359, 53.8726806640625, 4.68798828125, 138.05599975585938, 161.80154418945312, 66.92827606201172, -56.552215576171875, 13.987525939941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000410.npy"}
|
||||
{"epoch": 0.6198034769463341, "step": 411, "batch_size": 64, "mean": 61.25654602050781, "std": 88.8885498046875, "min": -201.24990844726562, "p10": -40.989192962646484, "median": 49.51630401611328, "p90": 173.24844360351562, "max": 281.8457336425781, "pos_frac": 0.71875, "sample": [-12.088569641113281, 155.68215942382812, 131.09786987304688, 167.50912475585938, 92.30438995361328, -19.19793701171875, 25.794906616210938, -201.24990844726562, 110.97571563720703, 6.306755065917969, 87.25481414794922, 192.43936157226562, 118.8469467163086, 102.79412841796875, 133.39083862304688, 62.04070281982422, 91.17781066894531, -54.81067657470703, 68.26708221435547, 73.46781921386719, -13.540458679199219, 184.35386657714844, 179.03466796875, 14.674386978149414, -39.80548095703125, 146.7692413330078, 3.5680999755859375, 15.620731353759766, -56.82986831665039, -33.95091247558594, 40.26287841796875, -15.693445205688477, -28.314292907714844, 43.87583923339844, 94.17129516601562, 34.93227767944336, -4.201807022094727, 159.1364288330078, 17.72265625, 281.8457336425781, 166.79531860351562, 41.34321594238281, -20.192535400390625, 173.12197875976562, 194.7266845703125, 173.30264282226562, -64.59274291992188, 36.26731872558594, -11.276172637939453, -80.05087280273438, 28.778581619262695, -46.65513610839844, 170.9317626953125, 137.33323669433594, 73.4511489868164, 103.51628875732422, 114.56147003173828, 32.82551574707031, 226.14901733398438, 55.156768798828125, -41.496498107910156, 0.555572509765625, -1.0785064697265625, 101.30973815917969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000411.npy"}
|
||||
{"epoch": 0.6213151927437641, "step": 412, "batch_size": 64, "mean": 57.324928283691406, "std": 78.88839721679688, "min": -112.02095031738281, "p10": -41.87274894714355, "median": 45.90519142150879, "p90": 168.38565826416016, "max": 212.02134704589844, "pos_frac": 0.75, "sample": [-67.38378143310547, 150.97926330566406, 62.63794708251953, 179.4613037109375, 36.08032989501953, 126.68408966064453, 36.62016296386719, 168.75125122070312, -77.67420959472656, -0.8230171203613281, 64.4998550415039, 6.601310729980469, 78.29106140136719, 167.53260803222656, -3.6382827758789062, -33.722999572753906, 153.9290771484375, -112.02095031738281, 47.37329864501953, 2.271331787109375, 175.96054077148438, 125.53749084472656, 36.75816345214844, 73.53607940673828, 99.8880615234375, -71.5201644897461, 8.796340942382812, 27.632457733154297, 23.704975128173828, 53.14454650878906, -5.668340682983398, 212.02134704589844, 3.580343246459961, 202.99229431152344, -42.931495666503906, 21.325714111328125, 64.3115234375, -7.299022674560547, 148.00857543945312, -4.700782775878906, -22.42399024963379, 10.153213500976562, 85.95684814453125, -39.402339935302734, 41.978336334228516, 184.584716796875, 120.652587890625, 127.7447280883789, 180.08595275878906, -66.82464599609375, 87.5515365600586, 107.34280395507812, 53.73945999145508, 18.234657287597656, 44.43708419799805, 150.05433654785156, -5.275445938110352, 79.40966796875, -48.08925247192383, 79.61011505126953, 159.9040985107422, 3.3786449432373047, 158.74578857421875, 25.7181396484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000412.npy"}
|
||||
{"epoch": 0.6228269085411943, "step": 413, "batch_size": 64, "mean": 50.955360412597656, "std": 101.08700561523438, "min": -192.55055236816406, "p10": -82.66257781982421, "median": 60.366580963134766, "p90": 176.3454559326172, "max": 326.65802001953125, "pos_frac": 0.71875, "sample": [11.951045989990234, -94.61228942871094, 102.37555694580078, 72.18069458007812, -14.416923522949219, -33.90925598144531, -1.2444915771484375, 180.76402282714844, -53.524566650390625, 234.7991943359375, -184.50445556640625, 173.86557006835938, 94.96619415283203, -48.70497131347656, 5.4055328369140625, 20.421539306640625, -84.58409118652344, 153.8519287109375, -0.7260665893554688, 10.512369155883789, 196.63925170898438, 107.51036071777344, 56.46891784667969, 67.40333557128906, 118.80831909179688, 37.75525665283203, 326.65802001953125, 74.01812744140625, 15.567466735839844, 42.627540588378906, 92.09005737304688, 159.8683319091797, 77.95535278320312, 60.10874938964844, 179.8839874267578, -100.97262573242188, 105.14289855957031, 1.9499053955078125, 76.83683013916016, 17.391159057617188, -125.67545318603516, 161.63980102539062, 166.70516967773438, -89.66883087158203, 45.962615966796875, -72.21083068847656, -192.55055236816406, 64.2509536743164, 77.70389556884766, 8.450969696044922, -40.66682434082031, -78.17904663085938, 182.10076904296875, -2.623157501220703, 64.77786254882812, 149.83753967285156, 63.46735763549805, 161.21487426757812, 30.39694595336914, 60.624412536621094, -70.72998046875, 135.62611389160156, 177.40826416015625, 124.70223999023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000413.npy"}
|
||||
{"epoch": 0.6243386243386243, "step": 414, "batch_size": 64, "mean": 62.74230194091797, "std": 82.44353485107422, "min": -146.95645141601562, "p10": -35.00765762329101, "median": 54.75424766540527, "p90": 174.6288848876953, "max": 208.25515747070312, "pos_frac": 0.765625, "sample": [148.91046142578125, -0.6896591186523438, 99.14208984375, 98.69287109375, 21.242767333984375, 51.654319763183594, -119.65594482421875, 100.43885803222656, 92.25348663330078, 19.525924682617188, 59.87811279296875, 160.11085510253906, 137.60464477539062, -54.71277618408203, 208.25515747070312, 57.291683197021484, -109.77552795410156, -42.11206817626953, 22.078582763671875, 110.81566619873047, 4.378519058227539, 71.79534912109375, 189.6630859375, -29.39417266845703, 183.1510772705078, 48.62860107421875, 63.1002082824707, 11.066566467285156, 95.50654602050781, 121.10935974121094, 174.7931671142578, 33.26777648925781, 139.61862182617188, -37.41343688964844, 199.29750061035156, 97.12639617919922, 154.71112060546875, 34.244537353515625, 52.21681213378906, 158.55340576171875, 30.445236206054688, -19.624610900878906, 11.607192993164062, -3.188650131225586, 100.89308166503906, -20.344924926757812, 4.443183898925781, 109.27487182617188, -146.95645141601562, 80.07611846923828, 153.187255859375, -6.3114776611328125, -2.0416107177734375, 41.15964889526367, 174.2455596923828, 189.73904418945312, 38.43895721435547, 18.2977294921875, 146.1954345703125, 123.67552185058594, -49.211212158203125, -4.99635124206543, 23.01369857788086, 197.11962890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000414.npy"}
|
||||
{"epoch": 0.6258503401360545, "step": 415, "batch_size": 64, "mean": 53.06175994873047, "std": 88.47310638427734, "min": -153.27853393554688, "p10": -47.9999053955078, "median": 37.24673271179199, "p90": 168.52499542236328, "max": 282.10205078125, "pos_frac": 0.75, "sample": [-10.353487014770508, 123.53922271728516, 21.092737197875977, 39.7500114440918, 19.15909194946289, 14.396293640136719, 147.59449768066406, 182.26971435546875, -58.736785888671875, 150.56484985351562, 85.7164535522461, -51.398223876953125, 152.44546508789062, 168.80506896972656, 4.580535888671875, 171.72216796875, 191.87930297851562, 111.6641845703125, 88.71649169921875, -1.7441577911376953, -11.68878173828125, 79.75776672363281, 108.15161895751953, -3.3232574462890625, 34.74345397949219, 167.87149047851562, -127.87970733642578, 10.700004577636719, 59.37110900878906, 118.78097534179688, -35.407379150390625, 145.1217803955078, 58.71098327636719, 5.402107238769531, -147.754150390625, 282.10205078125, 112.14188385009766, 114.90226745605469, 161.34893798828125, -40.07049560546875, 106.5231704711914, -112.23847961425781, -77.20401000976562, -153.27853393554688, 23.342124938964844, 64.81226348876953, -17.49274444580078, 28.31134033203125, -5.224815368652344, 6.045244216918945, 13.102300643920898, 59.309532165527344, 29.137374877929688, 22.236461639404297, 12.195869445800781, 83.00997924804688, -2.7032299041748047, 130.18048095703125, 104.66888427734375, 6.015281677246094, 10.718940734863281, 46.559959411621094, 180.94223022460938, 192.33700561523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000415.npy"}
|
||||
{"epoch": 0.6273620559334845, "step": 416, "batch_size": 64, "mean": 46.27003479003906, "std": 90.4978256225586, "min": -162.8618927001953, "p10": -49.31379852294921, "median": 29.012556076049805, "p90": 171.45796966552734, "max": 324.6434020996094, "pos_frac": 0.640625, "sample": [-0.16490936279296875, -129.7809295654297, -9.029895782470703, 71.69990539550781, 39.63829803466797, 171.51292419433594, -40.51243591308594, -43.62017822265625, 172.04129028320312, 184.5032958984375, 35.1151123046875, 164.67942810058594, 324.6434020996094, 24.85163116455078, -2.389179229736328, 68.6759262084961, -1.9923992156982422, 105.35750579833984, -162.8618927001953, -28.383758544921875, -4.207414627075195, 122.30641174316406, 27.138216018676758, 163.5042266845703, -0.7841873168945312, 119.89987182617188, 84.19668579101562, 175.62796020507812, 205.25827026367188, 62.409202575683594, 77.23755645751953, 139.52691650390625, -29.782638549804688, -55.345542907714844, 2.8502674102783203, -3.2440261840820312, -80.34454345703125, -114.29353332519531, -2.787996292114258, 3.683971405029297, -36.02251052856445, 30.820648193359375, -20.262527465820312, 7.2275390625, 100.54147338867188, -28.185298919677734, 116.41761016845703, -103.04583740234375, -3.5153980255126953, 113.29547119140625, 79.41987609863281, 4.863397598266602, -51.75392150878906, 114.84199523925781, 8.261634826660156, 27.204463958740234, 13.843147277832031, 44.09788513183594, 86.56403350830078, 44.883262634277344, 171.32974243164062, 137.37380981445312, 181.77886962890625, 84.47029876708984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000416.npy"}
|
||||
{"epoch": 0.6288737717309146, "step": 417, "batch_size": 64, "mean": 61.330543518066406, "std": 98.01376342773438, "min": -284.37384033203125, "p10": -50.7408706665039, "median": 65.92868423461914, "p90": 170.99331970214845, "max": 295.1285400390625, "pos_frac": 0.78125, "sample": [32.10942077636719, 6.9218902587890625, -18.409446716308594, -175.04666137695312, 179.84353637695312, 141.60736083984375, -284.37384033203125, 171.7867431640625, 70.53523254394531, 91.28968811035156, 295.1285400390625, 39.38243103027344, 20.05274200439453, 64.1041030883789, 112.79228973388672, -89.67427062988281, 167.54307556152344, 136.94918823242188, -2.2346038818359375, 69.42301940917969, 111.37765502929688, 70.32830810546875, 84.47543334960938, 58.52436828613281, 62.37860107421875, -49.913604736328125, 177.98681640625, 48.23228454589844, 29.31667709350586, 165.90911865234375, -76.47241973876953, 169.04257202148438, 174.2999725341797, -105.30659484863281, 164.57830810546875, -40.85859680175781, 51.45384216308594, -10.828437805175781, 161.63037109375, 96.89937591552734, 169.14199829101562, 7.891105651855469, 173.12237548828125, 76.93143463134766, -75.383056640625, 41.38294982910156, 2.205263137817383, 54.14958953857422, 1.5061187744140625, 71.48786926269531, 28.9285888671875, 161.403564453125, 7.0019989013671875, -24.7783203125, 99.1746597290039, -51.09541320800781, 89.47396850585938, 23.722904205322266, 96.4500732421875, -0.8128032684326172, 144.1151123046875, 67.75326538085938, 152.537353515625, 236.05963134765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000417.npy"}
|
||||
{"epoch": 0.6303854875283447, "step": 418, "batch_size": 64, "mean": 49.319732666015625, "std": 91.60889434814453, "min": -165.622802734375, "p10": -51.7795783996582, "median": 37.53801727294922, "p90": 172.7598052978516, "max": 340.39764404296875, "pos_frac": 0.671875, "sample": [81.96638488769531, 137.37261962890625, 117.57515716552734, 65.39419555664062, 65.37557983398438, 181.50259399414062, 12.891403198242188, 98.22679901123047, 181.9755096435547, -49.96570587158203, 162.29827880859375, 206.74217224121094, -36.57118225097656, 128.6688232421875, 60.4259033203125, -92.44081115722656, 159.14173889160156, 10.579383850097656, 139.87588500976562, -1.8807144165039062, -1.6154098510742188, 340.39764404296875, 92.21521759033203, -27.608230590820312, 181.82272338867188, 24.268531799316406, -32.743896484375, 2.1771469116210938, 26.718914031982422, -50.9498291015625, -165.622802734375, -102.31195068359375, 192.25411987304688, 43.31672668457031, 177.24331665039062, -16.614791870117188, 8.741947174072266, -13.475372314453125, 8.006393432617188, 16.39977264404297, -52.13518524169922, -55.614479064941406, -91.23992919921875, -5.491144180297852, 111.32610321044922, -19.27989959716797, -22.631248474121094, 39.50172424316406, 29.132848739624023, -8.026268005371094, 46.903785705566406, 102.05224609375, 160.908447265625, 156.77850341796875, 12.617210388183594, 35.574310302734375, -85.30104064941406, 78.46687316894531, 90.20553588867188, 115.66486358642578, 100.87451934814453, -45.38542175292969, 69.1424560546875, 60.64369201660156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000418.npy"}
|
||||
{"epoch": 0.6318972033257747, "step": 419, "batch_size": 64, "mean": 69.66299438476562, "std": 93.52377319335938, "min": -152.24037170410156, "p10": -23.224769210815424, "median": 34.50528526306152, "p90": 196.31598968505864, "max": 268.28192138671875, "pos_frac": 0.84375, "sample": [-13.904918670654297, 268.28192138671875, 6.610847473144531, 6.316642761230469, -76.6539306640625, 11.313690185546875, 186.0072784423828, 19.228904724121094, 18.38426971435547, 29.54122543334961, -152.24037170410156, 158.8094482421875, 205.22691345214844, 109.71714782714844, -47.64393615722656, 30.670455932617188, 69.08467102050781, 34.18702697753906, 200.7340087890625, 17.484695434570312, 72.3779296875, 94.6728286743164, -26.207061767578125, 113.39314270019531, 7.2871551513671875, 3.20013427734375, 225.32461547851562, -31.18255615234375, 258.4085693359375, 93.33816528320312, 37.89186477661133, -76.3199691772461, -16.26608657836914, 173.03366088867188, 262.00103759765625, 103.11091613769531, 152.9200897216797, 6.468315124511719, 15.929443359375, 91.75302124023438, 0.4713878631591797, 183.74484252929688, 19.080490112304688, 12.685882568359375, 168.9596405029297, 10.181808471679688, 83.7126693725586, 34.823543548583984, 21.981311798095703, 178.87124633789062, 174.0118865966797, 0.2990226745605469, 31.562618255615234, 32.81718444824219, 150.77500915527344, 13.149606704711914, 133.68206787109375, 131.26321411132812, 218.0957794189453, -11.308013916015625, 101.26622009277344, 49.983673095703125, -107.62212371826172, 183.65147399902344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000419.npy"}
|
||||
{"epoch": 0.6334089191232048, "step": 420, "batch_size": 64, "mean": 37.32643127441406, "std": 97.11750030517578, "min": -201.52334594726562, "p10": -71.2592029571533, "median": 12.985638618469238, "p90": 185.4270431518555, "max": 245.52703857421875, "pos_frac": 0.609375, "sample": [-8.062232971191406, -24.05850601196289, 135.8125457763672, 16.41082000732422, 114.08867645263672, -150.046630859375, 23.682998657226562, 5.514888763427734, -1.7622604370117188, -171.24078369140625, -20.5355224609375, 83.07244110107422, -2.8654937744140625, 150.18878173828125, 11.29241943359375, 186.8053436279297, 210.28976440429688, 102.26918029785156, 184.56442260742188, 155.08462524414062, 13.523122787475586, -54.92914962768555, -0.3743572235107422, 64.10356140136719, 99.00587463378906, 170.1038360595703, -86.8492202758789, -89.49649810791016, 74.54944610595703, 7.436748504638672, -109.76388549804688, 40.42417907714844, -48.028053283691406, 32.23624038696289, -0.5812568664550781, -78.25779724121094, -27.327613830566406, 0.8137741088867188, 199.60989379882812, 18.048263549804688, -21.24840545654297, -201.52334594726562, -53.33348846435547, -10.982925415039062, 155.2994384765625, 71.68421936035156, 12.44815444946289, 185.79673767089844, -2.7899093627929688, 69.42269897460938, 195.40057373046875, 38.897132873535156, 139.24310302734375, 6.245466232299805, 34.437129974365234, 130.7910614013672, 245.52703857421875, 10.918922424316406, -3.7159461975097656, 26.919509887695312, -4.517486572265625, 211.77801513671875, -45.30570983886719, -27.25295639038086], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000420.npy"}
|
||||
{"epoch": 0.6349206349206349, "step": 421, "batch_size": 64, "mean": 53.006935119628906, "std": 108.83377075195312, "min": -180.0901641845703, "p10": -102.85799179077146, "median": 44.60309982299805, "p90": 178.87640075683595, "max": 293.2621154785156, "pos_frac": 0.765625, "sample": [-131.82949829101562, 192.58038330078125, 6.598350524902344, 168.22625732421875, 163.25677490234375, 14.696563720703125, 98.89422607421875, 24.931732177734375, 5.225334167480469, 293.2621154785156, 235.4038848876953, -83.85102844238281, 179.55267333984375, 56.97484588623047, -40.90584182739258, 62.050106048583984, 26.267379760742188, -44.413482666015625, 177.29843139648438, 129.69369506835938, 112.82415771484375, 128.16555786132812, 0.40236854553222656, 5.953895568847656, 85.01150512695312, 7.119976043701172, 130.66220092773438, -180.0901641845703, -179.89370727539062, -73.02961730957031, 4.16337776184082, 169.6285858154297, 151.06109619140625, 11.485906600952148, 81.17891693115234, 188.42913818359375, 173.66085815429688, 141.72555541992188, 45.235511779785156, 1.9274711608886719, 168.4082794189453, -2.339397430419922, 97.551513671875, -173.70376586914062, -108.81892395019531, 115.0308837890625, 0.3834381103515625, 57.603477478027344, -115.08036804199219, 33.84291076660156, 263.6439208984375, 188.84286499023438, -4.535560607910156, 43.97068786621094, 37.995941162109375, -129.75048828125, 30.57776641845703, 83.38656616210938, 120.9193115234375, -88.94915008544922, 173.2654266357422, 96.8375015258789, -39.19861602783203, 3.0241355895996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000421.npy"}
|
||||
{"epoch": 0.636432350718065, "step": 422, "batch_size": 64, "mean": 70.10234069824219, "std": 86.04302978515625, "min": -87.2088851928711, "p10": -41.463414001464834, "median": 55.517263412475586, "p90": 180.64677886962895, "max": 257.7993469238281, "pos_frac": 0.78125, "sample": [190.51272583007812, 257.7993469238281, -62.45915985107422, -5.04876708984375, 171.22366333007812, -13.209022521972656, 164.08651733398438, -70.94416809082031, 123.4368896484375, 210.13314819335938, 11.487594604492188, 49.63825225830078, 16.007217407226562, 38.352508544921875, 206.94618225097656, 30.890209197998047, 165.23826599121094, 184.6852569580078, -77.68211364746094, 150.11373901367188, 74.85792541503906, -78.73861694335938, 0.8366432189941406, -0.9714393615722656, 117.90559387207031, 157.02093505859375, 145.0236358642578, 137.3543243408203, 223.68154907226562, 107.42019653320312, 100.95182800292969, 89.50782775878906, 31.33658218383789, -6.197748184204102, 48.62712097167969, 43.1561164855957, 115.56344604492188, 22.96531105041504, 164.09036254882812, 19.023773193359375, -30.43657684326172, -63.05105972290039, 162.10867309570312, 115.9814453125, -0.292510986328125, -21.30577850341797, 11.596054077148438, 118.96796417236328, 2.0356063842773438, 61.39627456665039, 62.09331130981445, 222.12771606445312, 34.39990997314453, -46.18920135498047, 16.194347381591797, 97.80671691894531, 24.380834579467773, -87.2088851928711, 12.577276229858398, 82.46605682373047, 163.574462890625, 104.8619613647461, 38.11042785644531, 149.7311553955078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000422.npy"}
|
||||
{"epoch": 0.6379440665154951, "step": 423, "batch_size": 64, "mean": 72.1503677368164, "std": 90.45381164550781, "min": -110.4717025756836, "p10": -14.699256134033202, "median": 58.75432586669922, "p90": 195.8690902709961, "max": 267.65521240234375, "pos_frac": 0.78125, "sample": [37.44483947753906, -4.528572082519531, 171.12249755859375, -97.80563354492188, 189.3249969482422, 2.459074020385742, 68.16574096679688, -41.787635803222656, 172.45001220703125, 52.5635986328125, -11.143936157226562, 163.8053741455078, 2.096160888671875, -59.846649169921875, 2.471038818359375, 8.158782958984375, 15.57672119140625, -3.3584747314453125, 179.2657012939453, -87.61698150634766, 105.03495788574219, 116.37946319580078, -13.868118286132812, -5.96830940246582, 67.80412292480469, 241.3852996826172, 143.81625366210938, 8.389850616455078, -110.4717025756836, 131.97091674804688, 228.75537109375, 153.17929077148438, 113.70954895019531, 143.8329315185547, 85.91767120361328, 47.120086669921875, 36.035980224609375, 196.30186462402344, 199.94886779785156, 194.85928344726562, 25.82170867919922, 10.35879898071289, 9.5440673828125, 113.84661865234375, 145.92596435546875, 147.78053283691406, 176.78167724609375, 73.21052551269531, -8.891036987304688, 73.77702331542969, 267.65521240234375, 101.46476745605469, 4.574502944946289, 15.668298721313477, 128.28732299804688, 208.865478515625, -11.83404541015625, 3.2365875244140625, -15.055458068847656, 30.765731811523438, 221.50039672851562, 3.114105224609375, 64.94505310058594, -16.67054557800293], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000423.npy"}
|
||||
{"epoch": 0.6394557823129252, "step": 424, "batch_size": 64, "mean": 68.5451889038086, "std": 93.51698303222656, "min": -150.99996948242188, "p10": -53.439880752563475, "median": 50.435791015625, "p90": 184.57996520996093, "max": 312.715087890625, "pos_frac": 0.796875, "sample": [-8.517074584960938, 214.63223266601562, 38.63966369628906, 143.8629608154297, -66.717041015625, 156.22537231445312, 312.715087890625, 161.75717163085938, -107.75663757324219, -75.00367736816406, 21.698017120361328, 16.2905216217041, 86.01160430908203, -150.99996948242188, 147.14822387695312, 12.098350524902344, 174.83265686035156, -54.026039123535156, 56.94476318359375, 31.50194549560547, -27.29237937927246, 88.71900939941406, 141.82550048828125, 26.37175750732422, -52.07217788696289, 178.52536010742188, 225.37811279296875, 13.876419067382812, 194.52139282226562, 29.585268020629883, 98.65438079833984, 114.08782196044922, 5.024162292480469, 62.08544158935547, 29.169647216796875, 66.98262023925781, 152.31736755371094, 219.14857482910156, 154.46450805664062, 110.38842010498047, 120.78421020507812, 25.472633361816406, 8.590381622314453, 184.04443359375, 20.330482482910156, 186.7222137451172, 124.33638000488281, -0.9546527862548828, -75.11188507080078, -5.930887222290039, 172.91514587402344, 59.19056701660156, 182.40521240234375, 184.80947875976562, 4.705863952636719, 30.615821838378906, 40.88954162597656, -81.23391723632812, 43.92681884765625, 129.80406188964844, 6.53839111328125, 28.409385681152344, -5.0171661376953125, 57.55009460449219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000424.npy"}
|
||||
{"epoch": 0.6409674981103552, "step": 425, "batch_size": 64, "mean": 54.67079162597656, "std": 89.77239990234375, "min": -142.94534301757812, "p10": -52.75496101379394, "median": 39.52672576904297, "p90": 177.19068756103516, "max": 250.79852294921875, "pos_frac": 0.75, "sample": [156.07733154296875, 138.42138671875, 27.84606170654297, 8.57373046875, 170.2755126953125, -10.54848861694336, 214.21856689453125, -10.64712905883789, 73.5792465209961, 103.78382873535156, 123.49636840820312, -67.60767364501953, 52.941036224365234, -35.819488525390625, -43.83955001831055, 122.19697570800781, 57.220115661621094, 188.98291015625, 16.525360107421875, 65.73739624023438, -142.94534301757812, 64.73983764648438, 0.7970085144042969, 75.27574157714844, 141.00466918945312, 13.373176574707031, 0.4117908477783203, 201.40707397460938, 11.044776916503906, 183.85757446289062, 164.4941864013672, -59.59085464477539, 107.67709350585938, -3.302215576171875, 72.08572387695312, 31.659278869628906, 41.759185791015625, 177.0013427734375, 28.022724151611328, 16.497783660888672, 193.57403564453125, -112.95482635498047, 64.56489562988281, 111.36146545410156, 31.197052001953125, -140.55014038085938, 40.62638854980469, 67.08818054199219, 38.42706298828125, -12.361312866210938, 177.27183532714844, -21.558940887451172, -83.22537231445312, 173.57412719726562, 15.622947692871094, 1.2252616882324219, 149.37916564941406, 35.43162536621094, -10.30438232421875, 250.79852294921875, -46.50962448120117, 2.1561126708984375, 152.8440704345703, -55.43153381347656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000425.npy"}
|
||||
{"epoch": 0.6424792139077853, "step": 426, "batch_size": 64, "mean": 33.85411834716797, "std": 102.29637908935547, "min": -260.58978271484375, "p10": -76.77668151855468, "median": 15.452226638793945, "p90": 158.48117065429688, "max": 265.49554443359375, "pos_frac": 0.609375, "sample": [-7.383739471435547, -2.0013771057128906, 137.0191650390625, 159.88229370117188, 14.150459289550781, 141.00576782226562, 14.311935424804688, 178.51348876953125, 20.766075134277344, 69.48457336425781, -21.911231994628906, -13.309661865234375, -3.1437149047851562, -141.12094116210938, 101.08592224121094, 14.925308227539062, 100.57211303710938, 32.9688720703125, 53.93962860107422, -29.069557189941406, -180.65769958496094, -28.5987548828125, 57.0821533203125, 76.55769348144531, -11.250404357910156, 10.256820678710938, 25.453428268432617, -233.10423278808594, 116.47509765625, -4.126228332519531, 206.51248168945312, 110.48993682861328, 153.49301147460938, -20.317289352416992, -260.58978271484375, 51.552974700927734, -9.551498413085938, 1.8900890350341797, -4.2538604736328125, 265.49554443359375, 171.85208129882812, 227.2681427001953, -108.07341003417969, -119.00152587890625, 5.281459808349609, 206.4641876220703, -16.012496948242188, 3.097187042236328, -2.763751983642578, -74.140380859375, 83.9610595703125, 114.36347198486328, -19.450515747070312, 94.03599548339844, 55.630096435546875, -72.4515609741211, 15.979145050048828, 131.0313720703125, 46.68925476074219, -77.90652465820312, 118.111083984375, 84.05023193359375, 155.21188354492188, -0.057460784912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000426.npy"}
|
||||
{"epoch": 0.6439909297052154, "step": 427, "batch_size": 64, "mean": 55.34111022949219, "std": 98.91305541992188, "min": -158.13768005371094, "p10": -84.05463104248047, "median": 44.177555084228516, "p90": 178.8256591796875, "max": 329.06390380859375, "pos_frac": 0.75, "sample": [99.92189025878906, 27.0926570892334, -13.935352325439453, -91.2364273071289, -84.40829467773438, 145.54884338378906, 178.55764770507812, 33.09721755981445, -11.456253051757812, -89.35145568847656, 166.83685302734375, -20.877674102783203, -83.22941589355469, -117.23947143554688, 43.17168426513672, 178.94052124023438, 98.0680923461914, 74.92848205566406, -8.044944763183594, -33.62303161621094, 0.26469993591308594, 74.48300170898438, 159.58804321289062, 70.56755065917969, 26.8997802734375, 155.8101806640625, 142.56948852539062, 149.01589965820312, 7.006965637207031, 33.067710876464844, 187.20660400390625, 19.180419921875, 113.14962768554688, 54.52698516845703, 25.66155242919922, 176.42086791992188, 45.18342590332031, -158.13768005371094, 195.0849609375, 188.39993286132812, 0.23827362060546875, 37.9603271484375, 57.0772705078125, 23.959426879882812, -143.86070251464844, -56.28876495361328, 131.18992614746094, 63.870452880859375, 37.41718292236328, -104.11225891113281, 177.46060180664062, 246.00942993164062, 0.9895172119140625, 74.36174774169922, 79.1540756225586, 34.76631164550781, 94.66326904296875, 59.5755729675293, 8.391799926757812, -25.257080078125, 103.43827056884766, -54.69184875488281, 329.06390380859375, 207.7427978515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000427.npy"}
|
||||
{"epoch": 0.6455026455026455, "step": 428, "batch_size": 64, "mean": 62.93409729003906, "std": 97.4823989868164, "min": -177.70655822753906, "p10": -42.88351783752441, "median": 62.580562591552734, "p90": 190.92342529296874, "max": 337.7251281738281, "pos_frac": 0.703125, "sample": [-101.55177307128906, 86.7341537475586, 195.06362915039062, -0.6181716918945312, 2.703826904296875, 9.147621154785156, 33.49789810180664, -129.2218017578125, -49.83232116699219, 65.29943084716797, 190.95761108398438, -29.279197692871094, 136.65176391601562, -17.21930694580078, 163.84994506835938, 62.85498046875, 175.2174072265625, 22.317405700683594, -6.912147521972656, 64.56582641601562, 197.4725799560547, -3.2251014709472656, 4.659309387207031, 0.200531005859375, 202.78988647460938, 101.28785705566406, 62.25720977783203, -177.70655822753906, 75.70655822753906, -132.99786376953125, 113.31573486328125, -1.0219650268554688, 20.352222442626953, 41.96706771850586, 72.73380279541016, 190.84365844726562, 168.71075439453125, 146.4552764892578, 69.9318618774414, -5.73866081237793, 205.82301330566406, 337.7251281738281, 89.48827362060547, 152.61328125, -65.75227355957031, -5.743560791015625, 81.01985168457031, 62.30614471435547, -1.4675846099853516, -10.327850341796875, 24.99114990234375, 18.076881408691406, -2.664257049560547, 2.3224449157714844, 170.14739990234375, 183.6070098876953, 191.53689575195312, 66.36598205566406, 132.13641357421875, 160.90341186523438, 153.75604248046875, 141.34994506835938, -37.432960510253906, -45.2194709777832], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000428.npy"}
|
||||
{"epoch": 0.6470143613000756, "step": 429, "batch_size": 64, "mean": 48.1893310546875, "std": 91.61060333251953, "min": -155.63308715820312, "p10": -68.95413322448728, "median": 44.868473052978516, "p90": 169.31278381347656, "max": 203.87135314941406, "pos_frac": 0.71875, "sample": [111.64378356933594, -141.68099975585938, 2.5568199157714844, -28.55693817138672, 146.0457763671875, 125.06111907958984, 51.19168472290039, 31.454856872558594, -18.675533294677734, 30.834793090820312, 52.82032012939453, -148.3909912109375, 80.82331085205078, 44.96571350097656, -4.3570098876953125, 135.1088104248047, -145.14846801757812, 35.59098815917969, -11.953521728515625, 108.81803131103516, 138.92752075195312, -34.76887130737305, 139.65902709960938, 162.00262451171875, 168.47882080078125, 170.21087646484375, 189.54122924804688, 3.722320556640625, 47.382164001464844, -30.447620391845703, -80.38309478759766, -42.286556243896484, 13.215003967285156, -13.599113464355469, 93.44005584716797, -9.061309814453125, -89.9840087890625, 86.3746566772461, -2.1757431030273438, 4.983329772949219, 174.54319763183594, 140.84494018554688, 30.20642852783203, 169.67019653320312, 145.16946411132812, -155.63308715820312, -131.05612182617188, 25.071712493896484, 6.530548095703125, 155.12472534179688, 20.94470977783203, 44.77123260498047, 19.84973907470703, -17.71651840209961, 63.220645904541016, 70.97361755371094, 203.87135314941406, 79.3055648803711, 197.0419158935547, 186.16085815429688, 9.561904907226562, 135.6007843017578, 56.92680358886719, 79.74877166748047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000429.npy"}
|
||||
{"epoch": 0.6485260770975056, "step": 430, "batch_size": 64, "mean": 78.61772155761719, "std": 94.7390365600586, "min": -241.19497680664062, "p10": -31.265793609619138, "median": 75.37279510498047, "p90": 198.7993179321289, "max": 213.97586059570312, "pos_frac": 0.8125, "sample": [68.076416015625, 25.431318283081055, -28.343521118164062, 41.674659729003906, 3.2985916137695312, 175.03648376464844, -0.6169013977050781, 165.33148193359375, 46.56267547607422, 120.0943374633789, -104.0363998413086, 159.55282592773438, 196.76815795898438, 4.909692764282227, 176.4010772705078, 107.46373748779297, 101.0354995727539, 56.15049743652344, 199.66981506347656, -32.51819610595703, 78.26118469238281, 213.97586059570312, 175.40457153320312, 126.66873168945312, -0.314727783203125, 180.88088989257812, 31.892776489257812, 209.74252319335938, 119.19287109375, 183.08322143554688, 92.39961242675781, 15.42169189453125, 88.186767578125, 189.32406616210938, -34.038761138916016, 200.7040252685547, 212.16534423828125, 134.49716186523438, -113.3946533203125, 151.8017120361328, 7.2484130859375, 109.37409210205078, 42.70573043823242, 68.247314453125, 19.082992553710938, 156.23756408691406, 23.96930694580078, 1.7854976654052734, -241.19497680664062, -8.360130310058594, -7.3509521484375, 175.8126220703125, 0.9400863647460938, 163.5644989013672, 46.069610595703125, 201.89959716796875, 204.6154022216797, 135.53341674804688, 160.2523193359375, -33.66838073730469, 72.48440551757812, -48.5771369934082, 8.880706787109375, 34.185150146484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000430.npy"}
|
||||
{"epoch": 0.6500377928949358, "step": 431, "batch_size": 64, "mean": 56.399986267089844, "std": 91.05561065673828, "min": -175.70346069335938, "p10": -35.34282989501953, "median": 42.20817565917969, "p90": 180.74546661376954, "max": 226.93869018554688, "pos_frac": 0.734375, "sample": [-171.7792205810547, 142.62330627441406, 73.79362487792969, 50.968345642089844, 21.59347152709961, 143.42208862304688, 131.84710693359375, 140.26156616210938, 194.7250213623047, -2.656129837036133, 8.912918090820312, 181.67953491210938, 180.32586669921875, -9.149124145507812, 44.383934020996094, 19.384756088256836, 225.976806640625, -1.0500717163085938, 226.93869018554688, 41.09735107421875, 66.28849029541016, -175.70346069335938, 20.682029724121094, 82.69471740722656, -38.20526123046875, 177.8040313720703, 67.82215881347656, -88.9259033203125, 44.032623291015625, -8.395078659057617, 68.87863159179688, -8.615734100341797, 166.24838256835938, 171.09225463867188, -105.52781677246094, -5.881797790527344, 183.45755004882812, 105.12492370605469, -33.392974853515625, -36.42314147949219, -36.17848205566406, 9.021240234375, -2.8149642944335938, 146.13291931152344, 1.5027542114257812, 21.61327362060547, 180.55174255371094, 74.38720703125, 0.9460296630859375, 210.718994140625, 23.039459228515625, 55.56804275512695, 7.032989501953125, 28.062274932861328, 106.31209564208984, 8.633487701416016, 43.319000244140625, -27.140113830566406, 10.77139663696289, 180.8284912109375, 172.6260986328125, 95.8980941772461, 31.383193969726562, -28.970382690429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000431.npy"}
|
||||
{"epoch": 0.6515495086923658, "step": 432, "batch_size": 64, "mean": 59.60150146484375, "std": 93.38139343261719, "min": -211.85536193847656, "p10": -53.14812469482421, "median": 51.88369560241699, "p90": 182.70645599365236, "max": 214.9259033203125, "pos_frac": 0.734375, "sample": [-26.543838500976562, -38.095794677734375, -49.25639343261719, -211.85536193847656, 3.692197799682617, 1.9331378936767578, 12.832473754882812, 84.55642700195312, -55.096702575683594, 175.38656616210938, 121.61036682128906, -84.5441665649414, 51.43316650390625, 57.10198974609375, 182.70187377929688, -142.48635864257812, 59.13091278076172, 26.228668212890625, 209.39308166503906, 165.04348754882812, 8.874168395996094, 98.73639678955078, 146.50936889648438, -26.043060302734375, -5.710071563720703, 189.382568359375, -9.931419372558594, 185.82968139648438, 157.20196533203125, -29.58369255065918, -54.816009521484375, 41.789302825927734, 11.51213264465332, -25.124099731445312, 29.872039794921875, 80.34568786621094, 184.27528381347656, 157.27749633789062, 50.824310302734375, 148.33792114257812, -20.505619049072266, 52.334224700927734, 132.4930419921875, 26.03502655029297, 149.1197509765625, 21.079757690429688, 71.57606506347656, 182.7084197998047, 22.293636322021484, 192.80836486816406, 133.9071502685547, 117.4542236328125, 132.50204467773438, -67.76094055175781, 92.56277465820312, 214.9259033203125, -9.483314514160156, 144.61569213867188, 76.92384338378906, 13.710426330566406, -73.05217742919922, 157.78392028808594, 1.64697265625, 166.09109497070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000432.npy"}
|
||||
{"epoch": 0.6530612244897959, "step": 433, "batch_size": 64, "mean": 71.48422241210938, "std": 90.96400451660156, "min": -194.37356567382812, "p10": -34.763899612426755, "median": 54.33330535888672, "p90": 180.18860168457033, "max": 210.8865509033203, "pos_frac": 0.765625, "sample": [46.78804397583008, 134.97817993164062, 27.999553680419922, 62.96450424194336, -17.049827575683594, 53.196044921875, -0.34786415100097656, 154.00173950195312, -19.26596450805664, -5.410369873046875, 8.768661499023438, 25.236778259277344, 132.5968017578125, 46.921424865722656, 178.46774291992188, -101.44947052001953, 104.36979675292969, 169.5614013671875, 154.43478393554688, -34.3780403137207, 55.47056579589844, 25.718017578125, 9.888084411621094, 183.12924194335938, -194.37356567382812, 69.29450988769531, 161.20530700683594, 37.822715759277344, 170.45355224609375, 151.26385498046875, 35.77928924560547, 79.23766326904297, -63.09637451171875, 7.350898742675781, 43.39071273803711, -15.97357177734375, -51.12202835083008, 42.06401062011719, 115.04901123046875, 181.5734405517578, 210.8865509033203, 178.5491943359375, 201.38980102539062, 44.85420227050781, 159.82986450195312, -107.22394561767578, 42.6781005859375, 143.5920867919922, -34.92926788330078, 178.35311889648438, -13.761398315429688, 123.66358184814453, 180.89120483398438, -30.19800567626953, 51.15227508544922, 171.89187622070312, 143.87973022460938, 64.59231567382812, 182.05828857421875, 195.1611785888672, 167.6105499267578, 45.938819885253906, 173.07952880859375, -65.45848083496094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000433.npy"}
|
||||
{"epoch": 0.654572940287226, "step": 434, "batch_size": 64, "mean": 78.90576171875, "std": 106.00309753417969, "min": -208.9269561767578, "p10": -56.044975280761705, "median": 89.48471069335938, "p90": 195.39996948242188, "max": 333.61175537109375, "pos_frac": 0.796875, "sample": [-185.60769653320312, 87.28533935546875, 146.88461303710938, 43.39039611816406, 166.82803344726562, 198.830322265625, -66.45698547363281, 199.48727416992188, -81.84251403808594, 91.68408203125, -5.673490524291992, -41.82713317871094, -208.9269561767578, 83.82246398925781, 163.0908966064453, -62.138336181640625, 134.31700134277344, 74.59715270996094, 333.61175537109375, 185.04124450683594, 110.9204330444336, 2.640451431274414, 232.2576141357422, 8.888206481933594, -135.80606079101562, 30.613937377929688, 186.43096923828125, 120.51968383789062, -0.062091827392578125, 142.657470703125, 3.7192955017089844, 102.75274658203125, 107.99433898925781, 182.66091918945312, 196.47293090820312, 182.98953247070312, -14.139259338378906, 140.48687744140625, 214.91921997070312, 16.03754425048828, 192.89639282226562, 13.901107788085938, 44.818912506103516, 161.37269592285156, 25.63427734375, 14.785881042480469, 37.92540740966797, 123.77042388916016, 77.58895874023438, 168.83255004882812, 11.42844009399414, 201.75161743164062, -127.18193054199219, -24.227935791015625, 9.916526794433594, 111.80081939697266, 156.55580139160156, 182.05096435546875, 154.8369140625, -1.3199691772460938, 66.11891174316406, 101.88068389892578, 187.3780975341797, 68.10074615478516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000434.npy"}
|
||||
{"epoch": 0.656084656084656, "step": 435, "batch_size": 64, "mean": 59.84100341796875, "std": 95.07709503173828, "min": -164.34994506835938, "p10": -57.143790435791004, "median": 44.241912841796875, "p90": 188.07929840087894, "max": 229.07705688476562, "pos_frac": 0.6875, "sample": [104.57476043701172, 93.832275390625, -17.808677673339844, 97.64966583251953, -110.87410736083984, 28.63787078857422, 8.750900268554688, -0.5406150817871094, 136.15432739257812, -24.003799438476562, 190.25567626953125, 173.33853149414062, -6.31121826171875, 116.6132583618164, 174.18472290039062, 196.68170166015625, 31.868125915527344, -31.460739135742188, -17.83531951904297, 178.52053833007812, 45.614906311035156, -9.437202453613281, 166.46502685546875, 197.74305725097656, 25.479995727539062, 42.868919372558594, -28.081527709960938, 72.7662353515625, 183.00108337402344, 201.94882202148438, -17.2244873046875, 91.51815795898438, -164.34994506835938, 148.82492065429688, 34.62417221069336, 78.32984924316406, -14.610954284667969, -34.4287109375, 144.85800170898438, 194.1353759765625, 229.07705688476562, 206.72537231445312, 182.08995056152344, 51.08592987060547, 0.5861358642578125, 53.10480499267578, 36.623600006103516, -42.82775115966797, -33.76072311401367, 182.6318359375, -99.32916259765625, 23.996841430664062, -64.2886962890625, 105.34978485107422, 114.37763977050781, 82.16215515136719, 160.2447509765625, -79.64974975585938, -90.51085662841797, 38.25114440917969, 2.358428955078125, -63.27923583984375, 151.64163208007812, 0.889801025390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000435.npy"}
|
||||
{"epoch": 0.6575963718820862, "step": 436, "batch_size": 64, "mean": 48.577430725097656, "std": 100.02490234375, "min": -188.885986328125, "p10": -82.81682891845703, "median": 26.98588752746582, "p90": 171.19714813232423, "max": 252.44989013671875, "pos_frac": 0.703125, "sample": [114.94862365722656, -31.935726165771484, 161.48751831054688, 4.85161018371582, 113.72913360595703, 7.852563858032227, 106.31143951416016, 164.58482360839844, -109.13599395751953, 27.802574157714844, 23.596633911132812, 167.3301544189453, 14.239936828613281, 115.8406982421875, -52.952388763427734, 185.40054321289062, 132.31936645507812, 124.74995422363281, -188.885986328125, 55.16004180908203, 11.765987396240234, 63.92713165283203, -31.835853576660156, 14.433555603027344, 48.669189453125, -13.407966613769531, 126.4083251953125, -0.4982738494873047, 165.3351593017578, 23.923805236816406, 76.66184997558594, -28.296966552734375, -177.8109893798828, 122.66102600097656, -35.80236053466797, 70.1379165649414, 172.85443115234375, -84.52481842041016, 6.9314727783203125, -74.52520751953125, 161.58843994140625, 138.5609130859375, 26.169200897216797, 12.448928833007812, -106.82791137695312, 146.38926696777344, -84.79578399658203, 207.94813537597656, 94.90484619140625, 252.44989013671875, 158.05630493164062, 1.0906295776367188, 201.02963256835938, 112.05342102050781, -28.473922729492188, 10.519573211669922, 179.216064453125, -78.8315200805664, 17.595497131347656, -0.15904808044433594, -64.407470703125, 196.8424530029297, -115.32646942138672, 76.61174774169922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000436.npy"}
|
||||
{"epoch": 0.6591080876795162, "step": 437, "batch_size": 64, "mean": 56.787315368652344, "std": 111.84133911132812, "min": -270.3974914550781, "p10": -80.83704299926758, "median": 59.22196960449219, "p90": 202.4258056640625, "max": 270.46282958984375, "pos_frac": 0.640625, "sample": [270.46282958984375, 222.48434448242188, 175.32899475097656, 115.78150939941406, 62.874778747558594, -21.28177261352539, 171.11180114746094, 97.15904235839844, 215.01727294921875, 73.53266906738281, 184.0057373046875, -24.23150634765625, -270.3974914550781, 68.51316833496094, -102.33979797363281, -78.15074920654297, 45.64045715332031, 115.34541320800781, 168.22760009765625, -152.5716552734375, 201.21522521972656, 228.05050659179688, 185.76564025878906, -23.674602508544922, -40.21391677856445, 81.62965393066406, 236.17584228515625, -24.721805572509766, -162.08682250976562, -82.69280242919922, -21.48229217529297, -77.8244857788086, 43.921226501464844, -20.20111846923828, -12.549552917480469, 179.85617065429688, 88.38258361816406, -28.1981201171875, 58.38288116455078, 101.9737548828125, 44.81129455566406, -9.797050476074219, -43.00540542602539, 202.9446258544922, 39.40338134765625, 173.89450073242188, 66.0392074584961, 122.88951873779297, 101.73249816894531, -136.15286254882812, 60.061058044433594, 39.16743087768555, 169.633544921875, 149.44818115234375, -5.512641906738281, 15.891365051269531, 31.301742553710938, 116.65971374511719, -81.98831176757812, -8.717960357666016, -2.0693302154541016, 92.91793823242188, 27.920021057128906, 218.69528198242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000437.npy"}
|
||||
{"epoch": 0.6606198034769464, "step": 438, "batch_size": 64, "mean": 77.40432739257812, "std": 98.85594177246094, "min": -110.12894439697266, "p10": -31.686894226074216, "median": 54.6728515625, "p90": 181.72946014404297, "max": 425.90618896484375, "pos_frac": 0.75, "sample": [212.00791931152344, -5.8238525390625, 425.90618896484375, -61.93412780761719, -2.529865264892578, -61.702857971191406, -28.012348175048828, 165.24705505371094, 2.2608203887939453, 238.6885528564453, -36.71810531616211, 205.5428466796875, -18.89196014404297, 13.850669860839844, 180.92349243164062, 0.2556953430175781, 166.51846313476562, -110.12894439697266, 156.25177001953125, 98.76404571533203, 148.56610107421875, 159.64508056640625, -9.97060775756836, 36.421417236328125, 66.73811340332031, 174.1575469970703, 61.33354187011719, 38.34867477416992, 176.69041442871094, 118.61494445800781, 91.91116333007812, -36.4267578125, 132.30841064453125, 15.638700485229492, 18.790084838867188, 171.27572631835938, 182.0748748779297, -10.711332321166992, 92.00663757324219, 37.18299865722656, 128.58590698242188, 13.669113159179688, 75.0201644897461, 280.58038330078125, 141.102294921875, 15.099441528320312, 130.37808227539062, 48.01216125488281, 20.91162109375, -3.4046669006347656, -33.26169967651367, -7.950838088989258, 29.227365493774414, 166.13522338867188, 159.6549072265625, -96.05269622802734, 174.98052978515625, 45.189613342285156, -1.6100234985351562, 194.05807495117188, 4.765842437744141, 41.064117431640625, 144.1271209716797, 78.52360534667969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000438.npy"}
|
||||
{"epoch": 0.6621315192743764, "step": 439, "batch_size": 64, "mean": 66.10691833496094, "std": 95.1710205078125, "min": -236.13946533203125, "p10": -21.368264198303223, "median": 44.41096496582031, "p90": 206.68310394287113, "max": 240.53326416015625, "pos_frac": 0.75, "sample": [162.77947998046875, 8.942394256591797, 31.7708797454834, -28.90381622314453, 48.98886489868164, 173.62525939941406, 107.95702362060547, 50.219970703125, -29.556373596191406, 221.73960876464844, 218.84129333496094, -12.31938362121582, 190.14663696289062, 11.623458862304688, 49.94798278808594, 240.53326416015625, 173.30279541015625, 21.22498321533203, 200.45880126953125, -36.28880310058594, 5.789775848388672, 126.8283462524414, 62.587181091308594, -3.1897811889648438, 181.52574157714844, 59.46611785888672, 213.88290405273438, -117.23199462890625, 6.184488296508789, 195.7013702392578, -21.839277267456055, 209.3506622314453, -34.237037658691406, -17.743179321289062, 36.62275695800781, -17.946060180664062, -2.682708740234375, 13.813514709472656, -236.13946533203125, 39.833065032958984, 59.6932373046875, 25.934616088867188, 188.7001495361328, 36.997100830078125, 66.00773620605469, 190.95338439941406, 57.037139892578125, 214.44398498535156, 59.498565673828125, 27.186614990234375, 26.14786148071289, 100.88523864746094, -20.26923370361328, 148.1724853515625, 36.1700439453125, 239.73960876464844, 131.62332153320312, -19.5452880859375, -14.473442077636719, 2.5187835693359375, 21.374324798583984, 66.20289611816406, 88.1158218383789, -7.882759094238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000439.npy"}
|
||||
{"epoch": 0.6636432350718064, "step": 440, "batch_size": 64, "mean": 68.06352233886719, "std": 101.2000732421875, "min": -175.42361450195312, "p10": -51.72348480224609, "median": 52.981239318847656, "p90": 187.38134765625, "max": 295.4384460449219, "pos_frac": 0.734375, "sample": [-27.02967071533203, 70.64524841308594, 45.14552307128906, 230.1141357421875, 187.10833740234375, 295.4384460449219, 187.49835205078125, -24.02562713623047, 186.43780517578125, 71.566650390625, 30.60118865966797, 175.7958526611328, 60.845314025878906, 165.87255859375, 150.0189666748047, 54.1527099609375, 183.77450561523438, -51.82769012451172, -143.54556274414062, 51.80976867675781, 115.3741226196289, -19.4193058013916, 11.680885314941406, 160.53878784179688, 50.485191345214844, 144.52059936523438, 99.19708251953125, 188.517822265625, -1.4283161163330078, 33.649391174316406, 1.703512191772461, 55.63988494873047, 163.48350524902344, -5.657947540283203, -0.673583984375, 34.69573974609375, 45.39698791503906, -175.42361450195312, 18.62615394592285, 193.4534149169922, 141.0985565185547, 189.80435180664062, 3.4567108154296875, 181.24032592773438, -50.231876373291016, 30.043380737304688, 191.7192840576172, 37.172462463378906, 169.7789764404297, -51.48033905029297, -73.18138122558594, -88.2566146850586, 81.23271179199219, -37.06866455078125, 112.03484344482422, 28.73434829711914, -13.486927032470703, -58.084938049316406, 173.84988403320312, -147.46575927734375, 40.66178894042969, 186.90890502929688, 182.12033081054688, 110.70781707763672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000440.npy"}
|
||||
{"epoch": 0.6651549508692366, "step": 441, "batch_size": 64, "mean": 74.68692779541016, "std": 80.28498077392578, "min": -81.38011932373047, "p10": -25.861218261718744, "median": 68.71144104003906, "p90": 184.1825210571289, "max": 207.2849578857422, "pos_frac": 0.78125, "sample": [161.94869995117188, 93.90076446533203, 189.51058959960938, 199.85006713867188, -0.49783897399902344, 168.1902618408203, 166.51516723632812, -10.416908264160156, 102.72396850585938, -15.388031005859375, -36.28007125854492, 145.86705017089844, -28.370948791503906, 184.20111083984375, 42.05976486206055, 55.881317138671875, 81.73616790771484, -3.750732421875, -36.22275161743164, 184.13914489746094, 70.17782592773438, 80.89427947998047, 141.12933349609375, 172.09385681152344, -2.0676956176757812, 179.18121337890625, 0.9955024719238281, 4.150825500488281, 207.2849578857422, 120.0272216796875, 70.38751983642578, 67.24505615234375, -63.023094177246094, 102.39085388183594, 131.75009155273438, 10.301544189453125, -63.94367980957031, 180.26837158203125, 52.62297058105469, 66.36215209960938, 36.279998779296875, 173.71835327148438, 47.625694274902344, 8.466991424560547, 97.65525817871094, 57.188568115234375, 107.85415649414062, -1.1844940185546875, 125.13371276855469, -20.00518035888672, 196.764892578125, 27.772539138793945, 193.39541625976562, 200.25656127929688, -48.71415710449219, 34.31669616699219, 74.6615982055664, 2.2699966430664062, 0.19034576416015625, 113.42224884033203, -81.38011932373047, 57.63520050048828, 164.99227905273438, 37.820945739746094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000441.npy"}
|
||||
{"epoch": 0.6666666666666666, "step": 442, "batch_size": 64, "mean": 71.95057678222656, "std": 90.4041748046875, "min": -178.51434326171875, "p10": -25.858528900146485, "median": 66.96420288085938, "p90": 181.56436157226562, "max": 260.6705017089844, "pos_frac": 0.765625, "sample": [5.18060302734375, 232.55551147460938, 48.92811584472656, -46.191253662109375, 67.89427185058594, 260.6705017089844, 198.7880859375, 180.24478149414062, 61.89242172241211, 165.16807556152344, 66.03413391113281, 166.24021911621094, 163.62930297851562, -7.80731201171875, 144.45639038085938, 110.67294311523438, 2.6297378540039062, 24.883316040039062, 43.462066650390625, -26.270591735839844, -83.42216491699219, 149.34735107421875, 107.32124328613281, -11.12542724609375, 128.45350646972656, 1.5312118530273438, -64.62816619873047, 101.3134765625, -0.42299842834472656, 201.54896545410156, -1.3744049072265625, 76.56018829345703, 189.13137817382812, 169.48825073242188, 236.15029907226562, 112.37004089355469, 29.022140502929688, 0.24689102172851562, 181.80784606933594, 55.56135559082031, 81.86360931396484, 26.314964294433594, 7.783262252807617, 19.367443084716797, -2.859884262084961, -178.51434326171875, 4.134971618652344, 163.37193298339844, -30.754791259765625, -24.897048950195312, 180.99623107910156, 140.65896606445312, -77.68809509277344, 71.23596954345703, 132.53530883789062, -6.538320541381836, 77.71072387695312, 22.682403564453125, 118.50808715820312, 173.91055297851562, 69.66691589355469, 177.53033447265625, -0.21572113037109375, 16.091169357299805], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000442.npy"}
|
||||
{"epoch": 0.6681783824640968, "step": 443, "batch_size": 64, "mean": 90.71723937988281, "std": 103.47681427001953, "min": -177.27459716796875, "p10": -14.390611267089843, "median": 92.73945617675781, "p90": 187.5782180786133, "max": 392.9042663574219, "pos_frac": 0.8125, "sample": [10.764087677001953, 165.9755401611328, 80.93504333496094, 180.19393920898438, 33.875946044921875, 52.43768310546875, 184.4198760986328, -72.93357849121094, 60.116455078125, 112.96241760253906, -41.55084991455078, -7.826410293579102, 73.28825378417969, 181.4139404296875, 75.79794311523438, 225.66848754882812, 8.811487197875977, 141.64083862304688, 117.7867202758789, 182.2269287109375, 392.9042663574219, -177.27459716796875, -14.876335144042969, 168.39688110351562, 6.429296493530273, 243.79470825195312, 158.68399047851562, 103.06568145751953, 166.56680297851562, 177.9539794921875, 166.96751403808594, 136.55445861816406, 151.05490112304688, 170.4358367919922, 72.50442504882812, 188.93179321289062, 173.5033721923828, 174.18528747558594, 124.17074584960938, 199.99270629882812, 8.246391296386719, 167.14456176757812, -7.108285903930664, 82.4132308959961, 179.6241912841797, 174.0675811767578, -13.257255554199219, 14.07086181640625, 7.616355895996094, 7.517599105834961, 6.200191497802734, -49.403236389160156, 145.4403076171875, -2.7998199462890625, 43.58810043334961, 4.699253082275391, -171.11178588867188, -24.373992919921875, 233.8257293701172, 63.48389434814453, 0.2548332214355469, -12.157478332519531, 143.88633728027344, 254.08543395996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000443.npy"}
|
||||
{"epoch": 0.6696900982615268, "step": 444, "batch_size": 64, "mean": 36.25995635986328, "std": 102.83992767333984, "min": -181.2838592529297, "p10": -72.71593170166015, "median": 14.688254356384277, "p90": 182.93405151367188, "max": 273.3203430175781, "pos_frac": 0.640625, "sample": [137.4805908203125, -3.540019989013672, -53.145530700683594, 1.9232501983642578, 61.8421630859375, 13.926441192626953, 104.31922912597656, -39.52101135253906, 11.951457977294922, 125.0924301147461, 7.761955261230469, 86.0952377319336, -126.58280944824219, -171.98159790039062, 63.724761962890625, 10.01470947265625, 80.60964965820312, 49.46018981933594, 65.21635437011719, -70.9578857421875, 181.860595703125, 236.98019409179688, -0.4617156982421875, -52.700164794921875, -15.959487915039062, 33.20722198486328, 6.907066345214844, -11.486242294311523, -71.7623291015625, -73.12461853027344, 184.2872314453125, 10.96725082397461, 188.55499267578125, 31.943679809570312, 23.36859130859375, 170.11065673828125, 40.446929931640625, 183.39410400390625, -9.83392333984375, -181.2838592529297, 33.11016845703125, 273.3203430175781, -135.36570739746094, -46.937339782714844, -3.056303024291992, 178.21923828125, 158.43731689453125, 215.03656005859375, 186.07736206054688, 73.27656555175781, 15.450067520141602, 107.75978088378906, -1.1995353698730469, 31.64264678955078, 177.30422973632812, -12.882600784301758, -64.50741577148438, 21.7974796295166, 0.6852569580078125, -114.74463653564453, -172.45709228515625, 166.21449279785156, -3.469156265258789, 7.819852828979492], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000444.npy"}
|
||||
{"epoch": 0.671201814058957, "step": 445, "batch_size": 64, "mean": 77.28157043457031, "std": 92.53673553466797, "min": -122.9019546508789, "p10": -36.935534858703605, "median": 82.21599197387695, "p90": 188.95879821777345, "max": 234.40512084960938, "pos_frac": 0.78125, "sample": [40.032283782958984, 234.40512084960938, 13.566581726074219, -42.48851013183594, -40.6705322265625, 27.762069702148438, 178.75546264648438, 172.72315979003906, 70.08981323242188, 79.41624450683594, -0.2550048828125, -122.9019546508789, -21.145172119140625, 16.760452270507812, -101.27166748046875, 165.1277618408203, -9.099227905273438, 14.426551818847656, 1.298553466796875, 69.03607940673828, -7.7602081298828125, 190.74716186523438, -122.5258560180664, 44.475067138671875, -60.03868865966797, 194.36199951171875, 2.6065750122070312, 177.43084716796875, 59.695228576660156, 0.18450927734375, -49.86936569213867, -11.88447380065918, 100.43584442138672, 200.30551147460938, 184.032958984375, 130.30360412597656, 156.2140655517578, 179.092041015625, 101.20318603515625, -27.12853240966797, 194.85238647460938, 85.01573944091797, 67.56600952148438, 215.7413787841797, 181.2843780517578, 179.857666015625, 221.04205322265625, 14.597131729125977, 159.24656677246094, -28.22054100036621, 125.55793762207031, 2.6635513305664062, 11.54052734375, 108.3088150024414, 88.81632232666016, 117.80914306640625, 171.76022338867188, 180.06854248046875, 104.51903533935547, 89.04424285888672, 153.28305053710938, 109.0608139038086, 184.78594970703125, 20.37010955810547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000445.npy"}
|
||||
{"epoch": 0.672713529856387, "step": 446, "batch_size": 64, "mean": 76.99099731445312, "std": 89.55258178710938, "min": -118.46952819824219, "p10": -20.716621208190908, "median": 68.79333877563477, "p90": 189.54016876220703, "max": 277.0645446777344, "pos_frac": 0.765625, "sample": [7.815946578979492, -1.4359512329101562, -118.46952819824219, 104.35845947265625, 15.831321716308594, 94.64964294433594, 67.96504211425781, -25.50933074951172, 277.0645446777344, 37.32829284667969, 24.136249542236328, 127.00653076171875, 158.7977752685547, 168.18603515625, 130.08377075195312, 189.05775451660156, 1.1855850219726562, 100.39815521240234, 73.70423126220703, 58.16814422607422, 3.075824737548828, 141.62142944335938, -0.6270751953125, 174.23214721679688, 189.74691772460938, 96.0840835571289, 6.845478057861328, 171.0438232421875, 203.04624938964844, -37.43383026123047, 170.5228729248047, 0.6061725616455078, -99.87308502197266, -4.354515075683594, -26.72699737548828, -2.3227710723876953, 22.192893981933594, -35.505794525146484, 73.52014923095703, 146.90679931640625, 41.8134765625, 205.70054626464844, 200.79502868652344, 176.85731506347656, 252.69891357421875, -1.1429805755615234, 239.87762451171875, 179.12322998046875, -9.533632278442383, 39.50441360473633, 69.62163543701172, 39.163482666015625, 80.28265380859375, 110.13641357421875, 181.2774200439453, 183.72476196289062, 5.272701263427734, 8.693550109863281, -1.8169174194335938, 99.22433471679688, -1.4014205932617188, 50.55788803100586, -47.296539306640625, 141.36672973632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000446.npy"}
|
||||
{"epoch": 0.674225245653817, "step": 447, "batch_size": 64, "mean": 89.65021514892578, "std": 102.91065216064453, "min": -186.60733032226562, "p10": -55.29376678466796, "median": 109.21742630004883, "p90": 205.11794433593752, "max": 236.25790405273438, "pos_frac": 0.8125, "sample": [236.25790405273438, 206.2322235107422, 167.666259765625, 18.70037841796875, 71.75807189941406, -28.221389770507812, 211.56637573242188, 37.613555908203125, -107.5071029663086, -9.273910522460938, 42.75846862792969, 158.51699829101562, 0.8523941040039062, 142.04916381835938, 196.14736938476562, 180.74017333984375, 102.51863861083984, 54.624637603759766, 100.03768920898438, 93.65834045410156, 17.65850830078125, -59.344764709472656, 61.23602294921875, -45.84143829345703, 48.67352294921875, 220.36459350585938, 197.9060821533203, 211.71958923339844, 147.06808471679688, -76.9736328125, 65.63267517089844, 73.99958801269531, 182.2502899169922, 154.38577270507812, 83.25814819335938, 170.59603881835938, 208.43960571289062, 145.78858947753906, 194.65281677246094, 5.549655914306641, 8.192794799804688, 136.07960510253906, 189.89761352539062, -130.34402465820312, -6.602832794189453, -137.7279052734375, 45.86559295654297, 158.33969116210938, 226.15267944335938, 136.0451202392578, 0.7897415161132812, 168.37042236328125, -82.697509765625, 202.51795959472656, 115.91621398925781, 92.06524658203125, 119.00889587402344, -186.60733032226562, 149.78785705566406, 199.9915008544922, 187.34405517578125, -21.194984436035156, 120.33627319335938, 162.3709716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000447.npy"}
|
||||
{"epoch": 0.6757369614512472, "step": 448, "batch_size": 64, "mean": 72.66026306152344, "std": 95.56904602050781, "min": -180.34115600585938, "p10": -43.0041221618652, "median": 67.53064727783203, "p90": 187.41301422119142, "max": 299.8031921386719, "pos_frac": 0.796875, "sample": [185.7694091796875, -136.87596130371094, 183.49447631835938, 92.97137451171875, 78.26966094970703, 121.71857452392578, 2.4058990478515625, 161.7709503173828, 33.99348449707031, -0.08538055419921875, 131.66368103027344, 95.6250228881836, 93.29141235351562, -6.8428497314453125, 183.56674194335938, 205.29676818847656, -5.554042816162109, 202.4476318359375, 20.84369659423828, 10.387981414794922, 13.370040893554688, 170.03213500976562, 69.96216583251953, 105.53041076660156, 133.92132568359375, -0.3871345520019531, 183.69654846191406, 82.31587982177734, 28.179054260253906, -60.50596618652344, -69.6242904663086, 62.41957092285156, 46.32849884033203, 15.530197143554688, 7.804542541503906, 118.15644836425781, -60.918914794921875, 21.425432205200195, 228.61444091796875, -8.131181716918945, 3.191793441772461, -180.34115600585938, 101.87065124511719, 43.34376525878906, 121.51287841796875, 185.6381072998047, 125.97734069824219, 163.46939086914062, 65.09912872314453, 46.647911071777344, -57.949668884277344, 47.420074462890625, 88.37832641601562, 188.11741638183594, 2.0828399658203125, 299.8031921386719, -5.8408203125, -123.6689224243164, 175.2144775390625, 189.52590942382812, 179.29898071289062, 50.49702453613281, 4.411430358886719, 194.67855834960938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000448.npy"}
|
||||
{"epoch": 0.6772486772486772, "step": 449, "batch_size": 64, "mean": 35.89563751220703, "std": 89.80038452148438, "min": -155.86558532714844, "p10": -88.34486541748046, "median": 25.734355926513672, "p90": 156.87627258300782, "max": 212.69662475585938, "pos_frac": 0.65625, "sample": [5.231035232543945, 31.508575439453125, -99.87298583984375, 65.79534912109375, 28.843101501464844, 0.35155487060546875, -22.427078247070312, 71.98236846923828, 63.12328338623047, 198.10299682617188, 158.52120971679688, 117.10543060302734, 4.792991638183594, -19.53199577331543, 3.319976806640625, -11.52166748046875, 212.69662475585938, 1.7069015502929688, -0.3622589111328125, 55.08974838256836, -67.25605773925781, 53.95763397216797, 178.29083251953125, 135.46234130859375, -155.86558532714844, -37.47220993041992, 129.59432983398438, -112.28457641601562, 144.38229370117188, 52.53289794921875, 46.38642883300781, -24.233749389648438, 22.6256103515625, -90.84989929199219, 153.0380859375, 17.013580322265625, 207.2947540283203, -9.498607635498047, 81.44648742675781, 40.220794677734375, 8.229192733764648, 19.812026977539062, -82.49978637695312, 38.20967102050781, -105.87345123291016, 95.44828796386719, -0.8449859619140625, 196.95602416992188, 14.61684799194336, 115.4944076538086, 196.53497314453125, -25.768264770507812, 34.89167022705078, -22.78130340576172, -101.67306518554688, 150.80181884765625, 133.53158569335938, -81.6327896118164, 141.1392822265625, -63.03038787841797, 80.77206420898438, -104.52963256835938, 39.93111038208008, -9.655128479003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000449.npy"}
|
||||
{"epoch": 0.6787603930461074, "step": 450, "batch_size": 64, "mean": 57.25027084350586, "std": 108.09274291992188, "min": -144.04818725585938, "p10": -57.828601074218746, "median": 17.413114547729492, "p90": 198.94774475097657, "max": 350.2774353027344, "pos_frac": 0.640625, "sample": [1.259267807006836, -15.854751586914062, -59.557945251464844, -39.39496612548828, 350.2774353027344, 202.3880615234375, 89.58700561523438, 63.2578125, 165.17056274414062, 189.33645629882812, 105.9586410522461, 40.67231750488281, 51.48027038574219, 52.931121826171875, 4.695823669433594, 186.83895874023438, -7.3970947265625, 79.47303009033203, 167.1496124267578, 251.73486328125, -1.4736671447753906, 33.99240493774414, -22.009674072265625, -53.79346466064453, 83.72068786621094, -20.551029205322266, -20.245887756347656, 210.61715698242188, 82.9674301147461, 6.353601455688477, -15.089622497558594, 200.96768188476562, 34.18242645263672, 147.34039306640625, -117.49329376220703, 194.23455810546875, 174.17288208007812, -4.091083526611328, 253.12136840820312, 7.135280609130859, 12.161678314208984, -6.895332336425781, 119.40425872802734, -125.22135162353516, 12.35699462890625, 3.8028812408447266, 150.01712036132812, -8.158782958984375, 186.76580810546875, -48.58961868286133, 21.6057071685791, -144.04818725585938, 13.220521926879883, 104.38417053222656, 272.57867431640625, -12.467857360839844, -84.25071716308594, 177.95216369628906, -113.04588317871094, 182.5979766845703, -83.38037109375, 9.673271179199219, -18.534128189086914, -11.976219177246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000450.npy"}
|
||||
{"epoch": 0.6802721088435374, "step": 451, "batch_size": 64, "mean": 52.1673698425293, "std": 94.02674102783203, "min": -168.85137939453125, "p10": -35.422415161132804, "median": 34.49562454223633, "p90": 179.5911560058594, "max": 256.67230224609375, "pos_frac": 0.671875, "sample": [123.29542541503906, -5.8348846435546875, 52.087493896484375, 59.89579772949219, 168.7371368408203, 18.6763916015625, -168.85137939453125, 23.566558837890625, 0.29798316955566406, 200.69583129882812, 130.80657958984375, 96.4487075805664, 184.88009643554688, 48.636566162109375, 157.67404174804688, 61.528472900390625, 68.92107391357422, -13.330284118652344, 30.589780807495117, 101.82859802246094, -2.499143600463867, -129.6358642578125, -124.37078857421875, 254.57015991210938, 172.20855712890625, -37.79217529296875, 108.291015625, -146.3570556640625, -43.18048095703125, -1.0521469116210938, 36.402870178222656, 256.67230224609375, -14.496944427490234, -0.6855792999267578, 158.234130859375, 93.59088897705078, 182.755126953125, 139.89077758789062, -22.104202270507812, 10.089805603027344, 16.986907958984375, 106.47380065917969, 3.0152206420898438, -3.9769287109375, 106.7229995727539, 32.58837890625, 6.349460601806641, 131.12782287597656, 198.1333770751953, 103.54067993164062, -29.892974853515625, 147.07159423828125, 40.45399475097656, -2.2083816528320312, 4.09600830078125, 16.201828002929688, 164.6437225341797, -1.0230598449707031, -7.533380508422852, 46.45155715942383, 185.0189208984375, -10.030155181884766, -119.89300537109375, -26.68804168701172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000451.npy"}
|
||||
{"epoch": 0.6817838246409675, "step": 452, "batch_size": 64, "mean": 60.54252243041992, "std": 105.20519256591797, "min": -163.38345336914062, "p10": -80.92560043334957, "median": 49.58246994018555, "p90": 192.63352203369143, "max": 262.13507080078125, "pos_frac": 0.75, "sample": [14.779121398925781, -15.997535705566406, 18.94599723815918, 88.21353149414062, 10.939048767089844, 118.68679809570312, 9.528564453125, 184.38084411621094, 157.1825714111328, 199.32119750976562, 15.038585662841797, 244.23890686035156, -151.6256103515625, 146.85662841796875, 173.254638671875, 76.86959075927734, 262.13507080078125, 130.00558471679688, 131.63372802734375, 130.12689208984375, 49.77587127685547, -32.897552490234375, 247.919677734375, 135.02175903320312, 31.389080047607422, -20.48429298400879, 169.7280731201172, 51.949951171875, -0.15720367431640625, 26.079139709472656, 0.17425155639648438, 222.50296020507812, -132.81663513183594, 188.69009399414062, -46.227142333984375, 109.51571655273438, 46.42417907714844, 118.28532409667969, -151.38461303710938, 7.296833038330078, 111.38518524169922, -94.78887939453125, 127.05168914794922, 49.389068603515625, 130.88795471191406, -163.38345336914062, 194.3235626220703, 147.29421997070312, 23.489532470703125, 8.091728210449219, 80.27222442626953, -7.669654846191406, 3.819681167602539, 150.84132385253906, 7.748416900634766, 6.4665985107421875, -48.57794952392578, 203.5022735595703, 158.92437744140625, -94.87251281738281, -21.243011474609375, 109.43659210205078, -156.6802215576172, -16.287033081054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000452.npy"}
|
||||
{"epoch": 0.6832955404383976, "step": 453, "batch_size": 64, "mean": 61.50476837158203, "std": 101.67036437988281, "min": -159.318603515625, "p10": -68.77076339721678, "median": 50.38550567626953, "p90": 192.7134948730469, "max": 294.12255859375, "pos_frac": 0.671875, "sample": [140.42343139648438, 50.81037902832031, 40.03672790527344, -18.72563934326172, 204.57781982421875, 32.47486114501953, -24.53791046142578, 168.84912109375, 168.53700256347656, -33.06555938720703, -127.90701293945312, -9.300918579101562, 84.3212890625, 6.2771148681640625, 166.79937744140625, 166.11245727539062, 294.12255859375, 51.83642578125, 189.28192138671875, 73.92585754394531, -2.2965030670166016, -23.67864990234375, 43.11018371582031, 20.106246948242188, 11.811073303222656, -146.39993286132812, -159.318603515625, 190.55218505859375, -2.2725906372070312, 175.91441345214844, 193.6397705078125, 48.05218505859375, 98.94900512695312, -52.56251525878906, -7.376132965087891, 54.170982360839844, 165.5563507080078, 177.37294006347656, -97.91942596435547, 162.64239501953125, 78.9980697631836, 237.26377868652344, -79.8009033203125, -1.6834297180175781, -25.269668579101562, 32.82142639160156, -75.71715545654297, 24.60440444946289, 133.6629638671875, 75.32890319824219, -8.869136810302734, -22.470157623291016, 60.61775207519531, 100.68565368652344, 108.21133422851562, -93.00596618652344, -2.3430747985839844, 76.44760131835938, 197.43446350097656, 49.96063232421875, 229.67788696289062, 154.23361206054688, 204.77029418945312, 5.843170166015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000453.npy"}
|
||||
{"epoch": 0.6848072562358276, "step": 454, "batch_size": 64, "mean": 70.82225799560547, "std": 98.70944213867188, "min": -234.69290161132812, "p10": -25.11663970947265, "median": 72.96947479248047, "p90": 187.9294158935547, "max": 334.574951171875, "pos_frac": 0.796875, "sample": [169.14215087890625, -29.660751342773438, -18.02581787109375, 201.00958251953125, -234.69290161132812, -14.827512741088867, 151.42788696289062, 77.15360260009766, 8.107662200927734, 26.94086456298828, 78.94880676269531, 182.42962646484375, 17.027374267578125, -197.83778381347656, 190.28646850585938, 123.95924377441406, 178.72145080566406, 71.89833068847656, 215.0640869140625, 42.47404479980469, -14.2774658203125, -105.4356689453125, 6.077264785766602, 156.04922485351562, 205.02926635742188, 334.574951171875, -2.968648910522461, 37.02748107910156, -40.231117248535156, 27.627479553222656, -28.155563354492188, 117.96367645263672, 161.56411743164062, 20.57929229736328, 128.20928955078125, 116.5401840209961, 210.59271240234375, -36.91102981567383, 191.20245361328125, 93.34527587890625, 104.63082885742188, 110.95564270019531, 5.507713317871094, 0.9977293014526367, 9.559555053710938, 25.19237518310547, 177.2515869140625, 116.62411499023438, 2.320375442504883, 31.028213500976562, 133.7086639404297, 154.76495361328125, 4.552928924560547, 165.31605529785156, 136.6726531982422, 32.663482666015625, 137.902587890625, 74.04061889648438, -8.121856689453125, 142.03408813476562, 95.23172760009766, 71.31149291992188, 4.361917495727539, -13.832527160644531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000454.npy"}
|
||||
{"epoch": 0.6863189720332578, "step": 455, "batch_size": 64, "mean": 65.564208984375, "std": 96.16670227050781, "min": -139.3477783203125, "p10": -47.73193702697753, "median": 36.29958534240723, "p90": 209.09572143554695, "max": 269.7954406738281, "pos_frac": 0.796875, "sample": [65.57501220703125, 41.495086669921875, 71.0069351196289, 230.56021118164062, 93.3904800415039, 2.436349868774414, 257.38818359375, 143.22250366210938, -49.58979034423828, 34.432655334472656, 131.69288635253906, -139.3477783203125, 69.80206298828125, 131.77745056152344, 63.140350341796875, 269.7954406738281, -23.81658172607422, 2.613311767578125, 2.018951416015625, -94.58576202392578, 227.87567138671875, 5.433343887329102, 165.87583923339844, -4.902462005615234, -2.2848739624023438, 215.86090087890625, 29.466812133789062, 127.37161254882812, 188.0169219970703, 48.53105926513672, 16.08957290649414, 17.447893142700195, 193.310302734375, 20.006324768066406, 16.519834518432617, -8.626350402832031, -71.19926452636719, 125.76213836669922, 161.66445922851562, 167.9248809814453, -43.39694595336914, 29.522979736328125, 91.34817504882812, -4.502410888671875, 163.89688110351562, 225.56605529785156, 93.65547943115234, 58.27855682373047, 9.545211791992188, 5.657989501953125, 192.97332763671875, 18.195274353027344, 0.5376243591308594, 176.66510009765625, 33.31831359863281, 5.40289306640625, 38.1665153503418, -72.86544036865234, -50.99751281738281, 8.381813049316406, 68.08792877197266, 24.41684341430664, -84.66183471679688, 265.76422119140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000455.npy"}
|
||||
{"epoch": 0.6878306878306878, "step": 456, "batch_size": 64, "mean": 59.35917663574219, "std": 101.2625732421875, "min": -165.2110137939453, "p10": -37.47820320129393, "median": 39.54744529724121, "p90": 179.18891296386724, "max": 331.9257507324219, "pos_frac": 0.71875, "sample": [124.39595031738281, -2.2972488403320312, 3.665468215942383, 118.13756561279297, 331.9257507324219, 2.0031471252441406, -44.37180709838867, 81.17437744140625, -7.445278167724609, 125.8963623046875, 48.16072082519531, 30.93416976928711, 266.5135498046875, 165.18463134765625, 17.102401733398438, 3.8769683837890625, 113.43959045410156, 83.69689178466797, -1.5920124053955078, 191.73236083984375, -125.40049743652344, 17.52587890625, 52.56464385986328, -21.39312744140625, 166.949462890625, -20.071657180786133, -165.2110137939453, -139.6078338623047, 153.02362060546875, -67.81661987304688, 9.751140594482422, 83.56217956542969, 6.775934219360352, 123.25410461425781, 13.7484130859375, 56.76634216308594, 65.67106628417969, -0.6727142333984375, 108.70498657226562, 4.253551483154297, -10.105474472045898, 51.72786331176758, 88.32029724121094, 106.14656066894531, -102.9480972290039, -121.61128997802734, 124.66596984863281, -1.2308349609375, -15.257621765136719, -8.213397979736328, 160.63565063476562, 183.51321411132812, 14.512893676757812, 248.23179626464844, 30.57276153564453, 152.42758178710938, 238.70848083496094, 18.982872009277344, 167.35943603515625, 169.098876953125, 7.378198623657227, -13.243927001953125, 77.51985168457031, 257.284423828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000456.npy"}
|
||||
{"epoch": 0.6893424036281179, "step": 457, "batch_size": 64, "mean": 66.36444091796875, "std": 87.61307525634766, "min": -125.45164489746094, "p10": -12.5826566696167, "median": 54.84649658203125, "p90": 185.33033294677733, "max": 237.55435180664062, "pos_frac": 0.796875, "sample": [27.13141632080078, 5.5015411376953125, -5.881465911865234, 127.32473754882812, 72.05426025390625, -116.6075439453125, 22.015125274658203, 39.626617431640625, 49.65375518798828, 8.97979736328125, 16.98127555847168, 205.024658203125, 166.9998016357422, 117.23282623291016, -6.801967620849609, -105.609375, 185.5869598388672, 38.816741943359375, -0.11859703063964844, 174.65719604492188, 157.75686645507812, 54.31993103027344, 213.86146545410156, 55.37306213378906, 6.027233123779297, 60.034027099609375, -12.582918167114258, 181.08642578125, 6.979896545410156, -0.8173332214355469, -102.35179138183594, -76.7859878540039, 14.670318603515625, 177.2908477783203, 28.12576675415039, -41.59912109375, -125.45164489746094, 58.1263427734375, 114.9867172241211, 14.84796142578125, 97.87042999267578, 10.217063903808594, 184.73153686523438, 153.8976593017578, -1.006591796875, 7.655952453613281, 149.00782775878906, 80.50914001464844, 5.673011779785156, 182.18377685546875, 109.97026824951172, 125.6789779663086, 237.55435180664062, 138.2456512451172, 99.00466918945312, 188.21389770507812, 193.39599609375, 59.78779602050781, 22.72437858581543, 35.7059440612793, 194.3876953125, -12.582046508789062, 94.6873779296875, 83.34359741210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000457.npy"}
|
||||
{"epoch": 0.690854119425548, "step": 458, "batch_size": 64, "mean": 68.11283874511719, "std": 105.01067352294922, "min": -199.95904541015625, "p10": -33.38054428100585, "median": 48.210838317871094, "p90": 199.35587921142582, "max": 276.53179931640625, "pos_frac": 0.796875, "sample": [168.5216522216797, 139.2471923828125, 4.914648056030273, 127.03204345703125, 67.39505004882812, 24.78708267211914, 128.72654724121094, -198.796142578125, -0.13245391845703125, 4.7367095947265625, -11.140594482421875, -67.60488891601562, -22.607101440429688, 248.26426696777344, 191.2353515625, 47.710838317871094, 47.178794860839844, 185.65769958496094, 74.22086334228516, 92.06150817871094, 14.305791854858398, 35.47874450683594, 31.58220672607422, -22.488929748535156, 139.82041931152344, 32.31611633300781, 245.5565948486328, 63.84925079345703, 19.36455535888672, 184.9193115234375, 222.00607299804688, 74.99158477783203, 95.01763916015625, -113.54059600830078, 62.44279479980469, 0.35157203674316406, 6.769309997558594, 20.883529663085938, 105.34788513183594, 162.91249084472656, 149.1260528564453, 44.6650505065918, -199.95904541015625, -6.9149017333984375, 276.53179931640625, 0.8512687683105469, 159.00466918945312, -37.99773406982422, 184.994140625, 210.66355895996094, -21.81087875366211, 273.73468017578125, 101.25395202636719, 22.786340713500977, -112.2643814086914, 188.6339569091797, 48.710838317871094, 182.9041748046875, -112.2899169921875, 41.112091064453125, 10.66250228881836, 38.535430908203125, 202.8361053466797, 80.15664672851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000458.npy"}
|
||||
{"epoch": 0.6923658352229781, "step": 459, "batch_size": 64, "mean": 84.15510559082031, "std": 102.83705139160156, "min": -245.34393310546875, "p10": -17.240444564819335, "median": 85.47223281860352, "p90": 204.64273376464845, "max": 273.89520263671875, "pos_frac": 0.78125, "sample": [46.87755584716797, 129.56399536132812, -9.641632080078125, 155.1707763671875, 19.58922576904297, -245.34393310546875, -0.7028026580810547, 195.60519409179688, 130.0960693359375, 168.62469482421875, 211.74020385742188, -38.72922134399414, 121.19764709472656, -73.58292388916016, 188.40032958984375, 230.75193786621094, 86.09178161621094, 101.24020385742188, 9.409843444824219, 176.01478576660156, 151.52325439453125, 173.3418426513672, 104.37709045410156, 22.41674041748047, 9.118185043334961, 182.793701171875, 69.62438201904297, 158.50933837890625, 34.80619430541992, 143.6326141357422, 240.05450439453125, 49.195762634277344, 2.214597702026367, 14.870939254760742, -14.081886291503906, 161.53387451171875, 65.49413299560547, -109.25624084472656, 173.57278442382812, -18.594112396240234, -3.835763931274414, 133.225341796875, 183.12925720214844, 2.7735671997070312, 273.89520263671875, -123.73953247070312, 88.79273223876953, 29.434425354003906, -58.0225830078125, 84.8526840209961, -11.227958679199219, -4.2286834716796875, -4.964330673217773, 245.60595703125, 139.28016662597656, 64.39048767089844, 192.87794494628906, 68.72586059570312, 203.67282104492188, 205.04312133789062, 206.5808868408203, 19.04212188720703, 203.70849609375, 29.39337921142578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000459.npy"}
|
||||
{"epoch": 0.6938775510204082, "step": 460, "batch_size": 64, "mean": 80.90373229980469, "std": 103.32182312011719, "min": -120.92526245117188, "p10": -39.1142219543457, "median": 53.26686096191406, "p90": 201.46036834716801, "max": 346.6524658203125, "pos_frac": 0.765625, "sample": [229.0314178466797, 116.78253173828125, 233.16427612304688, -58.51988220214844, 7.811973571777344, 179.92095947265625, 259.7088623046875, 54.889007568359375, 321.08837890625, 8.222549438476562, -46.89631652832031, -39.30970764160156, -11.903480529785156, 25.40935516357422, -1.483184814453125, 13.303352355957031, 13.690496444702148, 185.70993041992188, -60.169273376464844, 176.69482421875, 87.43624114990234, 184.6334686279297, 40.91608428955078, 210.74169921875, 175.79257202148438, 84.52152252197266, 3.9690513610839844, 51.64471435546875, 185.81626892089844, 76.63130950927734, 149.15509033203125, 178.29844665527344, 160.31243896484375, 155.9604949951172, 10.86570930480957, 65.18147277832031, 2.007518768310547, 15.254379272460938, 38.777252197265625, -0.9472217559814453, 91.09151458740234, 188.38523864746094, -38.65808868408203, 23.280366897583008, 148.40699768066406, 188.2543182373047, -41.88909912109375, 26.111984252929688, 183.61932373046875, -87.58499145507812, 3.519571304321289, 43.175140380859375, 188.35093688964844, 81.2884521484375, -26.467300415039062, 207.06399536132812, 47.176429748535156, 346.6524658203125, -120.92526245117188, -31.636276245117188, 112.65432739257812, -9.866943359375, -10.688850402832031, 182.409912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000460.npy"}
|
||||
{"epoch": 0.6953892668178382, "step": 461, "batch_size": 64, "mean": 82.05435943603516, "std": 100.7286148071289, "min": -124.11268615722656, "p10": -44.12338409423828, "median": 72.42246627807617, "p90": 210.20036468505862, "max": 274.8422546386719, "pos_frac": 0.75, "sample": [214.1473388671875, 69.3162612915039, 48.61039733886719, 26.246978759765625, 168.4110565185547, 211.97169494628906, 61.68463897705078, 167.54408264160156, 149.05252075195312, -120.17771911621094, 7.8300323486328125, 90.70368957519531, 58.50218200683594, 4.51603889465332, 132.54531860351562, -20.039108276367188, -56.96534729003906, 190.4353485107422, 1.504852294921875, 3.8356685638427734, 146.20310974121094, 166.1445770263672, 173.17245483398438, -3.3968505859375, 54.50214385986328, 132.81886291503906, -44.46894073486328, -7.9110107421875, -43.31708526611328, 131.03367614746094, 190.3450927734375, -40.30132293701172, -9.999963760375977, 76.42967224121094, 170.73219299316406, 206.78289794921875, 274.8422546386719, 201.59710693359375, 68.67202758789062, 243.93081665039062, 224.17808532714844, 174.66604614257812, 108.60968017578125, 28.561927795410156, 2.2313671112060547, -62.87864685058594, 114.65272521972656, -0.20572471618652344, 170.51747131347656, 21.971107482910156, 250.50291442871094, 97.22810363769531, 75.52867126464844, 32.575714111328125, 202.6035919189453, -40.42073059082031, 211.6649932861328, -124.11268615722656, 64.85282135009766, -18.114273071289062, 174.336181640625, -63.070884704589844, 180.67645263671875, -72.56382751464844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000461.npy"}
|
||||
{"epoch": 0.6969009826152683, "step": 462, "batch_size": 64, "mean": 83.61892700195312, "std": 97.19733428955078, "min": -190.97601318359375, "p10": -24.16351375579833, "median": 91.36479187011719, "p90": 195.30665588378906, "max": 254.75592041015625, "pos_frac": 0.796875, "sample": [163.45538330078125, 204.95657348632812, 198.767333984375, 193.03456115722656, 10.223579406738281, 6.815338134765625, 133.78176879882812, 4.218448638916016, -3.9525222778320312, 4.144157409667969, -46.352989196777344, 180.48265075683594, 169.72390747070312, 45.227378845214844, 187.46829223632812, 202.8290557861328, -35.79322814941406, 177.4813690185547, 19.146751403808594, 105.12813568115234, 165.4294891357422, 60.87782287597656, 60.14460754394531, 157.1552734375, 60.2686767578125, 11.08319091796875, 172.77561950683594, 206.35084533691406, -14.127384185791016, 158.23434448242188, -41.12115478515625, -1.4554901123046875, -13.952339172363281, 179.53182983398438, 194.35528564453125, -59.63006591796875, -28.464712142944336, 111.71134948730469, 42.528846740722656, 141.72532653808594, 195.71438598632812, 202.0648956298828, 92.01604461669922, 40.037025451660156, 42.08482360839844, 189.90054321289062, -4.1323699951171875, 29.59103012084961, 154.11740112304688, 168.7930145263672, 100.53944396972656, -172.5198516845703, 176.67794799804688, 180.282470703125, 161.54354858398438, 9.807060241699219, 23.879243850708008, 254.75592041015625, 90.71353912353516, 102.72303009033203, -190.97601318359375, 11.707145690917969, -5.389719009399414, 13.47323989868164], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000462.npy"}
|
||||
{"epoch": 0.6984126984126984, "step": 463, "batch_size": 64, "mean": 51.02170944213867, "std": 93.55805206298828, "min": -203.1431884765625, "p10": -55.628480148315425, "median": 43.3289737701416, "p90": 183.73882751464845, "max": 227.68003845214844, "pos_frac": 0.71875, "sample": [-6.744014739990234, 186.47219848632812, 101.08963012695312, -203.1431884765625, 105.39089965820312, 153.04998779296875, -26.095972061157227, 184.82745361328125, 193.96548461914062, 129.13648986816406, -57.531070709228516, -51.18910217285156, -16.120948791503906, 56.191673278808594, 133.53013610839844, -130.00210571289062, 47.320228576660156, 56.46565246582031, 21.367294311523438, -79.26276397705078, 71.47090148925781, 35.792388916015625, 6.25396728515625, 196.38809204101562, 79.3951187133789, -62.174285888671875, 26.959579467773438, 22.522071838378906, 2.835987091064453, 11.191692352294922, 34.83338928222656, -9.639533996582031, 179.74996948242188, 179.4891815185547, 65.2349853515625, 123.7146987915039, 123.5258560180664, 39.15979766845703, 4.391395568847656, 227.68003845214844, 39.33771896362305, 7.345787048339844, 82.1710205078125, -122.59590148925781, -39.23434066772461, -47.43505859375, 205.89682006835938, 136.53985595703125, 178.28245544433594, 114.62400817871094, 11.482189178466797, -140.53997802734375, -8.055023193359375, -21.029478073120117, 53.72654724121094, 190.095458984375, 56.56929397583008, -17.00927734375, 89.16316986083984, 98.2046127319336, -9.586814880371094, 57.71467590332031, 11.029790878295898, 181.19869995117188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000463.npy"}
|
||||
{"epoch": 0.6999244142101285, "step": 464, "batch_size": 64, "mean": 77.48553466796875, "std": 109.908935546875, "min": -188.199951171875, "p10": -60.49991989135739, "median": 79.3995132446289, "p90": 216.7741668701172, "max": 247.50196838378906, "pos_frac": 0.734375, "sample": [-31.548599243164062, 32.36388397216797, 183.57907104492188, -20.137237548828125, 181.40806579589844, -4.4517669677734375, 7.040107727050781, 177.17860412597656, 83.42306518554688, 154.66952514648438, 193.41961669921875, -32.97429656982422, 178.990234375, 122.25562286376953, 93.73118591308594, -97.95205688476562, 223.64816284179688, 156.2901611328125, 29.574020385742188, 172.0771026611328, 3.2557144165039062, 247.50196838378906, -3.369791030883789, 122.04281616210938, 18.737380981445312, 219.39990234375, 55.537330627441406, 219.8482666015625, 127.82772827148438, 160.3093719482422, 95.19294738769531, 11.470512390136719, 181.7694091796875, 236.1769256591797, -188.199951171875, -72.29661560058594, 188.85150146484375, 54.162269592285156, -141.06253051757812, -89.03024291992188, 156.21786499023438, 9.619911193847656, 41.5050163269043, 239.34384155273438, -77.90128326416016, -2.8271255493164062, 18.172348022460938, 75.37596130371094, -15.950576782226562, 218.0671844482422, 137.93606567382812, 213.7571258544922, -178.36126708984375, -30.917179107666016, 58.67078399658203, -16.562992095947266, 7.288215637207031, 186.32473754882812, 147.26156616210938, 211.26895141601562, -18.720985412597656, 150.1350860595703, 158.01046752929688, 20.650955200195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000464.npy"}
|
||||
{"epoch": 0.7014361300075586, "step": 465, "batch_size": 64, "mean": 65.93341064453125, "std": 104.59547424316406, "min": -137.34214782714844, "p10": -73.36497573852539, "median": 78.3323974609375, "p90": 191.8107696533203, "max": 308.7640686035156, "pos_frac": 0.671875, "sample": [-25.230575561523438, 308.7640686035156, 172.5086669921875, -133.5498046875, 158.64532470703125, 148.98348999023438, -36.15052795410156, 191.50819396972656, 74.42449951171875, 64.17510223388672, 79.02247619628906, 184.83169555664062, 135.74757385253906, 219.99237060546875, -42.78453826904297, 156.2689208984375, 17.26671600341797, 187.522705078125, 148.29925537109375, -6.837135314941406, -84.29884338378906, -117.32106018066406, 198.14511108398438, 216.9210662841797, 77.64231872558594, 199.62091064453125, -73.9870834350586, -114.009521484375, -26.752235412597656, 95.51306915283203, -0.9942855834960938, -34.045265197753906, 80.517333984375, 94.43607330322266, 154.53628540039062, 15.036651611328125, -62.19847106933594, -4.905143737792969, 6.343969345092773, 176.5300750732422, -85.06951904296875, -20.21631622314453, 85.97978210449219, 45.87421417236328, -11.862892150878906, -137.34214782714844, -71.91339111328125, 91.44440460205078, 36.42950439453125, 191.94044494628906, 13.183464050292969, 226.30746459960938, 20.256507873535156, 96.98948669433594, 25.569984436035156, -31.83624839782715, 86.66362762451172, -19.18366050720215, 125.05905151367188, 92.45926666259766, 173.3323974609375, 152.11959838867188, 158.16062927246094, 175.25332641601562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000465.npy"}
|
||||
{"epoch": 0.7029478458049887, "step": 466, "batch_size": 64, "mean": 37.110450744628906, "std": 98.46041107177734, "min": -283.57208251953125, "p10": -63.15143127441404, "median": 18.9542875289917, "p90": 175.14525756835937, "max": 247.52670288085938, "pos_frac": 0.671875, "sample": [3.2922134399414062, 168.58016967773438, -41.21832275390625, -119.58978271484375, 14.100381851196289, 4.036375045776367, 24.071868896484375, -14.052001953125, 25.71239471435547, -283.57208251953125, 84.34941101074219, 66.13795471191406, 187.16165161132812, -124.42382049560547, 87.35477447509766, -5.2738037109375, 95.03459167480469, 234.39871215820312, 43.625152587890625, 209.79678344726562, 56.06929397583008, 4.4620513916015625, 173.14715576171875, -0.3596305847167969, -9.737640380859375, -1.9684677124023438, 124.053466796875, -72.24530029296875, 247.52670288085938, 41.1600341796875, -5.013727188110352, 126.66859436035156, -18.46405792236328, 24.797515869140625, 163.0572052001953, 41.19917678833008, 126.2091064453125, 51.202430725097656, -26.331745147705078, 15.188102722167969, -24.60907745361328, -190.72348022460938, -89.47936248779297, -72.26435852050781, 142.686279296875, 25.0106201171875, 14.516067504882812, 121.2419662475586, 228.09121704101562, -0.8496818542480469, -41.932403564453125, 78.15310668945312, -32.79082489013672, 33.378326416015625, -7.363410949707031, 1.9880542755126953, 12.778196334838867, 10.20136833190918, 22.72047233581543, 176.0015869140625, 198.01947021484375, 0.7497749328613281, 8.901924133300781, 40.50023651123047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000466.npy"}
|
||||
{"epoch": 0.7044595616024187, "step": 467, "batch_size": 64, "mean": 59.86858367919922, "std": 94.39842987060547, "min": -159.46644592285156, "p10": -41.54809455871582, "median": 34.27323913574219, "p90": 184.18254852294925, "max": 386.4601745605469, "pos_frac": 0.8125, "sample": [0.3268280029296875, 102.72706604003906, 36.36798095703125, 59.77085876464844, 32.178497314453125, 83.17713165283203, 1.8723297119140625, -18.582439422607422, -38.46519088745117, 70.39653778076172, 132.22891235351562, -53.726383209228516, 186.94956970214844, -56.24726867675781, 98.18806457519531, 111.86908721923828, 94.69969177246094, 37.23805236816406, -139.2451171875, 11.964309692382812, 157.54949951171875, -58.7525634765625, -35.020652770996094, 141.10992431640625, 213.6387176513672, 25.372909545898438, 23.35291290283203, 109.02565002441406, -159.46644592285156, 177.72616577148438, -4.928459167480469, 44.3304443359375, 8.369527816772461, 10.82183837890625, 0.2909088134765625, 16.967300415039062, 106.31039428710938, 215.88259887695312, 108.41114807128906, 98.12176513671875, 0.1165771484375, 0.11686134338378906, 11.297279357910156, 18.261354446411133, 5.675725936889648, -42.86933898925781, 163.37823486328125, 5.944908142089844, 160.89930725097656, 134.38670349121094, 386.4601745605469, 14.610031127929688, -2.7364883422851562, 6.594717025756836, 95.27494049072266, 107.48908233642578, 40.573204040527344, 192.97763061523438, 4.0884552001953125, 230.8367156982422, -51.80937194824219, 217.0403594970703, 162.96548461914062, 17.21485137939453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000467.npy"}
|
||||
{"epoch": 0.7059712773998488, "step": 468, "batch_size": 64, "mean": 80.89079284667969, "std": 106.20854949951172, "min": -201.36651611328125, "p10": -37.67604751586913, "median": 77.40111541748047, "p90": 213.76978454589843, "max": 244.7900390625, "pos_frac": 0.828125, "sample": [2.536653518676758, 36.890380859375, 127.85421752929688, 239.5257110595703, 25.04816436767578, -41.975929260253906, 4.134366989135742, 142.99143981933594, 206.1912841796875, 65.91259765625, 107.6314697265625, 224.01638793945312, 223.5562286376953, -177.7255401611328, 25.449600219726562, 12.250030517578125, 53.29735565185547, -19.8428955078125, -201.36651611328125, 129.2418975830078, 178.81002807617188, 133.74725341796875, 179.0816650390625, 228.84811401367188, 72.09197998046875, 160.7395477294922, 183.53216552734375, 0.5534381866455078, -16.158416748046875, -136.34014892578125, 213.79339599609375, -147.79827880859375, 99.38690185546875, -1.5371665954589844, 21.74842071533203, -59.277015686035156, 144.58743286132812, 169.04867553710938, -53.21275329589844, 177.89996337890625, 63.389930725097656, 51.54351806640625, 193.40093994140625, 213.71469116210938, 161.01974487304688, 0.9465293884277344, 24.960744857788086, 51.79310607910156, 8.714790344238281, 0.33838462829589844, 188.39852905273438, 30.33185577392578, 126.87442016601562, 41.449462890625, 244.7900390625, 118.12982177734375, -27.642990112304688, 215.67794799804688, 177.10317993164062, 186.48422241210938, 181.58834838867188, 84.70256805419922, 82.71025085449219, 21.428627014160156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000468.npy"}
|
||||
{"epoch": 0.7074829931972789, "step": 469, "batch_size": 64, "mean": 75.31197357177734, "std": 87.23910522460938, "min": -132.23968505859375, "p10": -15.072142219543451, "median": 65.95589065551758, "p90": 180.2067901611328, "max": 300.9940185546875, "pos_frac": 0.765625, "sample": [20.762451171875, 180.24009704589844, 96.02293395996094, 157.49853515625, 18.93340301513672, 5.396121978759766, 97.71070861816406, 166.29443359375, 74.95973205566406, -132.23968505859375, 170.25550842285156, 85.92984008789062, 168.5444793701172, 4.686958312988281, -7.308708190917969, 180.1290740966797, 124.71179962158203, 56.952049255371094, 145.4788055419922, 179.42306518554688, 195.95578002929688, 8.977951049804688, 136.78164672851562, -2.085174560546875, 104.44525146484375, -82.0004653930664, 41.390052795410156, 9.6357421875, 4.957609176635742, 52.97447204589844, -6.262725830078125, 0.8150768280029297, 84.890625, 200.8995819091797, 177.5601348876953, 14.809524536132812, -0.7253952026367188, -23.56574249267578, 174.2596893310547, 158.53575134277344, 185.63931274414062, 91.29749298095703, 39.75429153442383, 145.27413940429688, 11.781471252441406, -4.2723846435546875, 86.713623046875, -0.06256103515625, 167.7006072998047, 23.380142211914062, -45.223846435546875, -27.762401580810547, 230.11770629882812, -3.197967529296875, 300.9940185546875, -8.458641052246094, 116.84999084472656, 33.402587890625, 140.67446899414062, -17.9064998626709, 140.72531127929688, 11.589994430541992, -30.34503173828125, 184.66944885253906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000469.npy"}
|
||||
{"epoch": 0.708994708994709, "step": 470, "batch_size": 64, "mean": 71.88774871826172, "std": 100.55558776855469, "min": -201.4785614013672, "p10": -41.65749053955077, "median": 75.12539672851562, "p90": 199.5244140625, "max": 248.6851348876953, "pos_frac": 0.78125, "sample": [219.34854125976562, 90.70711517333984, 230.24160766601562, -201.4785614013672, 63.514801025390625, 190.24517822265625, 32.99383544921875, 43.752464294433594, 179.59255981445312, 99.418212890625, 195.36981201171875, -70.98075866699219, 199.4150390625, 10.235618591308594, 162.55087280273438, 36.251060485839844, 91.95513153076172, -101.13176727294922, -21.00860595703125, 161.48291015625, -27.650333404541016, 183.92938232421875, 7.158365249633789, -117.85382080078125, 202.48524475097656, 26.061838150024414, 179.44207763671875, 137.7736358642578, 0.5089492797851562, 2.138113021850586, 117.47663879394531, 4.562629699707031, 138.0049285888672, 39.80248260498047, -72.00262451171875, 12.557914733886719, 101.41317749023438, 1.8905696868896484, 90.10696411132812, -20.3145751953125, 248.6851348876953, 192.872314453125, 208.68179321289062, 30.772483825683594, -30.614418029785156, -24.400985717773438, 26.741533279418945, 187.09512329101562, 104.33096313476562, 131.9329833984375, 103.11857604980469, 205.75857543945312, -46.390235900878906, 153.35548400878906, 21.719932556152344, 172.4788818359375, 117.77022552490234, 86.73599243164062, -6.2101898193359375, -80.57804107666016, 199.5712890625, 2.0935420989990234, -28.508853912353516, 3.837005615234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000470.npy"}
|
||||
{"epoch": 0.7105064247921391, "step": 471, "batch_size": 64, "mean": 80.34571838378906, "std": 99.51949310302734, "min": -187.9986114501953, "p10": -24.227475738525385, "median": 79.60746765136719, "p90": 195.30552978515627, "max": 271.40673828125, "pos_frac": 0.828125, "sample": [3.0882415771484375, 231.34176635742188, 147.57969665527344, 164.8434295654297, 83.5133056640625, -42.24090576171875, 178.82037353515625, 50.465972900390625, 188.79945373535156, 224.98779296875, 70.31237030029297, 55.06886291503906, 185.02511596679688, 23.079132080078125, -145.22396850585938, -1.731008529663086, 78.83068084716797, 196.70556640625, 179.82382202148438, -95.38470458984375, 35.16988754272461, 39.320865631103516, 154.21963500976562, 98.60169982910156, 173.79071044921875, -187.9986114501953, 80.3842544555664, 119.80645751953125, 184.18699645996094, 17.99022674560547, 150.96914672851562, 127.70809936523438, 30.44179916381836, 39.84642028808594, 97.39216613769531, 88.76168060302734, 190.45263671875, -59.71943664550781, 57.33228302001953, 182.445068359375, 18.04456329345703, 25.836456298828125, 187.13140869140625, 216.0889892578125, -9.765363693237305, 8.149032592773438, 11.076553344726562, 59.7943000793457, -26.50464630126953, 162.0884246826172, -18.914077758789062, 196.16162109375, 95.36681365966797, 19.802871704101562, 193.3079833984375, 3.1815528869628906, 11.121429443359375, 103.53749084472656, -150.20957946777344, 271.40673828125, 29.571014404296875, 114.20101928710938, -5.746490478515625, 228.5903778076172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000471.npy"}
|
||||
{"epoch": 0.7120181405895691, "step": 472, "batch_size": 64, "mean": 62.37507629394531, "std": 116.31867980957031, "min": -216.60118103027344, "p10": -79.69791641235349, "median": 53.17904281616211, "p90": 192.42705841064452, "max": 389.547119140625, "pos_frac": 0.703125, "sample": [-1.6064529418945312, 43.00043487548828, 87.22344207763672, -36.28407287597656, 192.67892456054688, 172.48117065429688, -34.580078125, 132.9676055908203, 218.13153076171875, 124.31674194335938, 85.92318725585938, 256.7829284667969, 8.483129501342773, -53.28236389160156, -7.036809921264648, 187.8159637451172, -4.701135635375977, 82.16934967041016, 184.92002868652344, 43.695343017578125, -117.91690063476562, 27.418869018554688, -34.75850296020508, 187.81134033203125, 13.627410888671875, 187.19931030273438, 41.05891418457031, 200.63177490234375, 188.25189208984375, -4.615573883056641, 22.354202270507812, 89.70126342773438, 91.93244934082031, -140.47006225585938, -39.53041076660156, 66.76840209960938, 230.22225952148438, 389.547119140625, -7.500068664550781, -5.4214324951171875, 188.245361328125, 191.83937072753906, -216.60118103027344, 198.12557983398438, -177.52586364746094, -91.01886749267578, 182.24810791015625, 8.461542129516602, -159.44638061523438, 98.3764877319336, 8.976081848144531, 62.662742614746094, -13.027450561523438, 42.14934539794922, 15.525741577148438, 63.32903289794922, 86.2330322265625, 26.96385955810547, 188.51454162597656, 42.28660583496094, 95.36479187011719, 73.43436431884766, -158.6978759765625, 166.17477416992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000472.npy"}
|
||||
{"epoch": 0.7135298563869993, "step": 473, "batch_size": 64, "mean": 71.55660247802734, "std": 121.8648452758789, "min": -181.586181640625, "p10": -85.50588912963866, "median": 45.29718017578125, "p90": 240.55994262695316, "max": 328.31951904296875, "pos_frac": 0.734375, "sample": [253.2882080078125, 203.83753967285156, 29.549665451049805, -149.87860107421875, -127.63871765136719, 156.43247985839844, 47.343841552734375, 11.613067626953125, 27.508493423461914, 211.88784790039062, 178.32080078125, 43.250518798828125, -4.196420669555664, 111.70365905761719, 2.0909347534179688, -56.950355529785156, 265.045166015625, 233.93087768554688, 15.777534484863281, 204.2283935546875, -118.58876037597656, 25.69530487060547, 148.30267333984375, -87.7478256225586, -72.09518432617188, 172.7704620361328, 164.68597412109375, 206.71826171875, 7.943056106567383, -22.69731903076172, -21.5589599609375, 6.142173767089844, -169.19134521484375, -31.82068634033203, -20.58795738220215, 93.38645935058594, 328.31951904296875, 245.68167114257812, 26.16408920288086, 16.0604190826416, 264.1941833496094, 178.53890991210938, 283.71893310546875, 93.53562927246094, 105.90336608886719, 80.23649597167969, 73.68375396728516, 6.126129150390625, 39.724525451660156, 177.08729553222656, 142.41238403320312, -3.82232666015625, 114.104248046875, 129.26708984375, -12.697273254394531, 138.26486206054688, 243.40097045898438, -124.25660705566406, 10.89532470703125, -80.27470397949219, 25.44293975830078, 121.47227478027344, 199.52337646484375, -181.586181640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000473.npy"}
|
||||
{"epoch": 0.7150415721844293, "step": 474, "batch_size": 64, "mean": 65.6942138671875, "std": 113.63704681396484, "min": -185.52584838867188, "p10": -59.127157974243154, "median": 42.17783546447754, "p90": 215.75445098876955, "max": 296.28411865234375, "pos_frac": 0.671875, "sample": [202.19149780273438, 177.96142578125, 146.934814453125, -70.01853942871094, -9.04281997680664, 14.094295501708984, 253.24154663085938, 216.10789489746094, 84.05609130859375, 57.99249267578125, 132.6845245361328, 75.07341003417969, -185.52584838867188, 141.1765899658203, 84.65757751464844, 195.8330535888672, -101.23509979248047, 0.26430511474609375, -42.33368682861328, 34.563262939453125, -62.45514678955078, -8.497993469238281, 169.5959930419922, 45.44805908203125, 158.7011260986328, 43.69880676269531, -0.04300880432128906, -34.03361511230469, -13.173019409179688, -1.3393020629882812, 173.88534545898438, 40.656864166259766, -166.20339965820312, 0.7129974365234375, -22.561038970947266, 191.96316528320312, 27.57917022705078, 220.93136596679688, 214.92974853515625, -0.06061553955078125, 19.12562370300293, 51.68292236328125, 250.43133544921875, 163.8761749267578, -51.36185073852539, 290.53466796875, 224.66958618164062, 71.54940795898438, 130.69818115234375, -0.18311500549316406, 182.09683227539062, 296.28411865234375, -47.01588439941406, 33.939552307128906, 27.98497772216797, -25.007328033447266, 187.37525939941406, -137.5804443359375, -11.103954315185547, 111.33306884765625, 158.72116088867188, -140.18492126464844, 11.145706176757812, 17.006328582763672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000474.npy"}
|
||||
{"epoch": 0.7165532879818595, "step": 475, "batch_size": 64, "mean": 93.23725891113281, "std": 104.86935424804688, "min": -92.15902709960938, "p10": -23.529163169860837, "median": 53.92621612548828, "p90": 204.50915679931643, "max": 311.2503662109375, "pos_frac": 0.78125, "sample": [-8.943933486938477, 8.289775848388672, -19.842384338378906, -6.7284698486328125, 175.90213012695312, 191.20401000976562, 184.55882263183594, 205.6424102783203, -18.08001136779785, -41.14319610595703, 178.32235717773438, -92.15902709960938, 193.9927978515625, 307.0831604003906, 17.30222511291504, -20.604721069335938, 167.28225708007812, 201.86489868164062, 6.645145416259766, 131.83206176757812, 49.900047302246094, 20.982955932617188, -24.782495498657227, 11.690376281738281, 166.36099243164062, 7.655551910400391, 21.868553161621094, 198.35401916503906, 187.30517578125, -11.530113220214844, 206.40951538085938, 191.28567504882812, 199.23739624023438, 200.247802734375, 57.95238494873047, 2.4593467712402344, 172.2682342529297, 13.529609680175781, 130.0269775390625, 24.884716033935547, 272.5147705078125, 180.48806762695312, 16.853321075439453, 29.171340942382812, 165.80039978027344, -39.807376861572266, 36.59446334838867, 102.91555786132812, 311.2503662109375, 34.805519104003906, 183.82994079589844, 245.3739013671875, 261.3057861328125, 103.2247543334961, 176.6300506591797, -74.0616683959961, -44.25727081298828, 193.94363403320312, 49.03636169433594, 185.95758056640625, 0.8472251892089844, -5.620018005371094, -54.75050354003906, 46.610958099365234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000475.npy"}
|
||||
{"epoch": 0.7180650037792895, "step": 476, "batch_size": 64, "mean": 61.417755126953125, "std": 106.69882202148438, "min": -146.7879638671875, "p10": -52.90378723144531, "median": 56.12534713745117, "p90": 182.67203063964845, "max": 309.4647521972656, "pos_frac": 0.671875, "sample": [215.33950805664062, 75.4337158203125, -9.644872665405273, 80.97503662109375, -43.40666198730469, 41.596466064453125, -54.412750244140625, 184.03756713867188, 309.4647521972656, 63.11700439453125, 261.6184997558594, -2.337566375732422, 173.01220703125, 166.33798217773438, 144.02993774414062, 118.92972564697266, 153.250732421875, -0.9282150268554688, 57.954078674316406, 19.768157958984375, -127.97095489501953, 1.7229576110839844, 156.31504821777344, 174.71546936035156, 45.73567199707031, -121.64524841308594, 41.616943359375, -35.035675048828125, -49.38287353515625, 1.1431236267089844, 207.039794921875, -32.55943298339844, 120.1923828125, 11.734745025634766, 71.3987045288086, -78.15739440917969, -134.3377227783203, -146.7879638671875, 54.29661560058594, 3.6564254760742188, -31.337509155273438, 173.3331756591797, 17.063446044921875, -44.04364013671875, 128.43373107910156, 70.99504089355469, 174.0602264404297, -31.28278350830078, -30.686798095703125, 149.41241455078125, 127.08596801757812, -112.48117065429688, 204.48822021484375, -27.419342041015625, 107.69432067871094, 245.01901245117188, 5.259843826293945, 74.97990417480469, 179.48577880859375, 156.08319091796875, -21.60733985900879, 176.00270080566406, 169.71121215820312, -47.33924102783203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000476.npy"}
|
||||
{"epoch": 0.7195767195767195, "step": 477, "batch_size": 64, "mean": 62.726585388183594, "std": 126.68375396728516, "min": -233.18722534179688, "p10": -136.22829132080074, "median": 52.665367126464844, "p90": 214.0824981689453, "max": 305.46697998046875, "pos_frac": 0.765625, "sample": [-149.9728240966797, -104.15771484375, 146.164306640625, 49.02537536621094, -5.36444091796875, 187.64540100097656, 11.795387268066406, 22.50730323791504, 56.30535888671875, 4.158668518066406, 211.92572021484375, 58.24192810058594, -154.61167907714844, -4.843772888183594, 19.49753761291504, 8.401042938232422, -19.227706909179688, 267.7013854980469, 158.4254608154297, -84.16082763671875, 242.8835906982422, 136.2372283935547, 3.482940673828125, 56.33415985107422, 92.39300537109375, -33.666358947753906, -151.40414428710938, 0.8943405151367188, 198.30096435546875, 6.542236328125, -185.7860870361328, 15.870658874511719, 60.35504913330078, 40.82870101928711, -200.6097412109375, -36.9726676940918, -50.21022033691406, 42.840110778808594, 154.39820861816406, 36.36073303222656, 214.4877471923828, 9.489486694335938, 213.1369171142578, 197.6123504638672, 81.08126831054688, 214.73992919921875, -193.61557006835938, 177.45089721679688, 131.01596069335938, 15.60614013671875, 305.46697998046875, 152.1630401611328, 9.84898567199707, 194.65599060058594, 221.5197296142578, 83.44880676269531, -233.18722534179688, 244.255859375, 193.7611541748047, 199.18243408203125, 107.3655014038086, 178.38302612304688, 25.032394409179688, 163.07106018066406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000477.npy"}
|
||||
{"epoch": 0.7210884353741497, "step": 478, "batch_size": 64, "mean": 51.645225524902344, "std": 107.0411605834961, "min": -171.1715545654297, "p10": -100.55672683715821, "median": 38.889686584472656, "p90": 188.23126831054688, "max": 240.80650329589844, "pos_frac": 0.6875, "sample": [-5.065366744995117, 173.40159606933594, 102.0181884765625, 158.96456909179688, 140.45132446289062, -4.5463409423828125, 194.52127075195312, 100.936767578125, -27.869020462036133, 110.5203857421875, 5.6494598388671875, 14.653213500976562, 94.406494140625, 220.82693481445312, -32.83448028564453, 17.4490966796875, -109.86293029785156, 34.53082275390625, -18.08515167236328, 79.96025085449219, -49.735023498535156, -34.031654357910156, 189.40228271484375, 161.88731384277344, 179.245849609375, -101.1402359008789, 155.8111572265625, 150.5641632080078, 91.79531860351562, -166.56427001953125, 59.85765075683594, 6.603809356689453, -161.0968017578125, -170.06842041015625, 18.935321807861328, 118.20088195800781, 183.07765197753906, 32.231781005859375, 174.48236083984375, 38.48169708251953, 193.36170959472656, -6.504617691040039, -131.67922973632812, 39.29767608642578, 185.4989013671875, 52.17091369628906, -5.85772705078125, 240.80650329589844, 36.91059875488281, -1.6736412048339844, 216.13804626464844, 90.34020233154297, -56.388092041015625, 141.38287353515625, 1.6498870849609375, 210.90399169921875, 135.99607849121094, -49.36396026611328, 5.015968322753906, 39.32133483886719, -99.19520568847656, -171.1715545654297, 103.03862762451172, 7.32725715637207], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000478.npy"}
|
||||
{"epoch": 0.7226001511715797, "step": 479, "batch_size": 64, "mean": 47.40142059326172, "std": 102.88249206542969, "min": -232.69314575195312, "p10": -82.65474166870116, "median": 41.17858123779297, "p90": 175.16034545898438, "max": 322.64752197265625, "pos_frac": 0.6875, "sample": [7.507930755615234, 27.500385284423828, 141.23687744140625, -146.9958038330078, -97.64244079589844, 83.5010757446289, 223.47323608398438, 96.76333618164062, 118.60496520996094, -86.5062484741211, 50.11209487915039, -60.9308967590332, 14.749237060546875, -15.855016708374023, -15.599174499511719, 40.83222961425781, 233.1062774658203, 211.3797607421875, -18.91853141784668, 0.5485191345214844, -41.64987564086914, 44.53066635131836, 322.64752197265625, 199.43844604492188, 27.202966690063477, -130.48843383789062, 53.55812072753906, 53.476043701171875, -116.1798324584961, -20.538414001464844, 0.7389068603515625, 119.29883575439453, -4.5867462158203125, 155.3048553466797, -14.915008544921875, 65.69268798828125, -139.12551879882812, -5.327880859375, 19.787826538085938, 154.2539825439453, 147.85231018066406, -232.69314575195312, -33.648963928222656, 72.00303649902344, 175.97695922851562, 173.25491333007812, 81.13065338134766, 2.84735107421875, -1.7689285278320312, 94.00457000732422, 41.524932861328125, 108.11188507080078, -73.66789245605469, 193.41470336914062, -9.471101760864258, 121.90937805175781, 112.1832275390625, 17.087190628051758, 55.973915100097656, 142.55308532714844, 4.02583122253418, 35.258392333984375, 98.16981506347656, 157.6719207763672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000479.npy"}
|
||||
{"epoch": 0.7241118669690099, "step": 480, "batch_size": 64, "mean": 72.774169921875, "std": 107.73574829101562, "min": -206.31668090820312, "p10": -49.46998825073241, "median": 74.11055755615234, "p90": 189.9614410400391, "max": 360.4605407714844, "pos_frac": 0.75, "sample": [81.1630630493164, -4.41424560546875, 181.26467895507812, -163.2265167236328, 136.80308532714844, 24.66303062438965, 164.25869750976562, -206.31668090820312, 29.667266845703125, 360.4605407714844, -44.128883361816406, 107.77299499511719, 186.16172790527344, -26.213890075683594, 97.61819458007812, -24.241775512695312, -62.48817443847656, 194.0860137939453, -41.3916015625, 7.033657073974609, -121.4669189453125, 169.36087036132812, 14.559425354003906, -117.74251556396484, 173.09698486328125, 158.2587432861328, 300.8377685546875, 78.6231689453125, 178.6857147216797, 48.930755615234375, 104.97272491455078, 2.2236175537109375, 32.17804718017578, 141.71041870117188, 42.394691467285156, 99.09773254394531, 212.5725555419922, 126.43853759765625, -120.36286163330078, 118.04917907714844, 170.0076904296875, 97.6032485961914, 57.173500061035156, 172.9823760986328, 69.59794616699219, 161.52145385742188, -0.9489288330078125, 31.713958740234375, 55.21462631225586, 36.68501663208008, -8.173147201538086, -24.861160278320312, 193.20230102539062, 93.30253601074219, 164.72975158691406, 123.4417724609375, 191.5898895263672, 170.02679443359375, -1.3179512023925781, -51.759033203125, 57.698150634765625, 215.84207153320312, 2.639251708984375, 38.68505859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000480.npy"}
|
||||
{"epoch": 0.7256235827664399, "step": 481, "batch_size": 64, "mean": 64.88946533203125, "std": 105.85149383544922, "min": -186.10519409179688, "p10": -54.29572677612303, "median": 52.559898376464844, "p90": 186.86560211181643, "max": 351.38140869140625, "pos_frac": 0.75, "sample": [49.90647888183594, 4.283164978027344, 183.60653686523438, 73.01375579833984, -99.12434387207031, 126.13453674316406, 165.96090698242188, 68.33963012695312, -63.20244598388672, -99.91522216796875, 141.91458129882812, 70.95970153808594, 170.41754150390625, 78.35648345947266, 14.225044250488281, 112.41275024414062, 177.47598266601562, 38.88346862792969, 140.49853515625, 152.50283813476562, -182.75979614257812, -4.711677551269531, 195.11712646484375, 55.21331787109375, -3.393077850341797, -39.323448181152344, 16.733108520507812, 178.95899963378906, 1.6647758483886719, 102.59207153320312, 1.2495975494384766, 188.26234436035156, 8.974624633789062, -9.51812744140625, 48.71614074707031, -32.19203186035156, 142.2236328125, 11.874088287353516, 4.819614410400391, 9.349807739257812, 71.49658203125, 31.70384979248047, 177.84765625, 194.84158325195312, -2.7286758422851562, 210.19473266601562, -35.11578369140625, -18.141090393066406, 189.39781188964844, 181.55491638183594, 181.11038208007812, 17.007789611816406, 86.73985290527344, -21.75914764404297, -60.71241760253906, 351.38140869140625, -186.10519409179688, 147.09707641601562, 113.7733154296875, 309.3124694824219, 97.67829132080078, 4.300994873046875, -109.61785888671875, 21.165889739990234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000481.npy"}
|
||||
{"epoch": 0.72713529856387, "step": 482, "batch_size": 64, "mean": 66.14950561523438, "std": 124.29828643798828, "min": -264.17193603515625, "p10": -85.02051315307617, "median": 54.85157012939453, "p90": 207.7725524902344, "max": 381.1497497558594, "pos_frac": 0.703125, "sample": [24.842300415039062, -264.17193603515625, -20.34123992919922, 149.1778564453125, 60.485130310058594, 143.75961303710938, 58.50559997558594, -31.001365661621094, 7.749202728271484, 219.28855895996094, 107.6077880859375, 188.4891815185547, 171.49163818359375, 46.551109313964844, 128.96084594726562, 159.89794921875, 272.27386474609375, -9.189857482910156, -187.20054626464844, 94.14203643798828, 211.27957153320312, -7.62213134765625, 381.1497497558594, -46.32752990722656, -4.939750671386719, 135.5394287109375, 147.93017578125, 70.04280090332031, -11.800704956054688, 5.771427154541016, 10.561622619628906, 163.49819946289062, -93.85055541992188, 111.85710144042969, 199.58950805664062, 171.0065460205078, 98.56000518798828, 27.884944915771484, -22.945209503173828, -79.4364013671875, 344.0772399902344, 133.61923217773438, 41.081275939941406, 14.869873046875, 0.18918609619140625, 184.9485626220703, 54.524940490722656, -1.368703842163086, 321.816162109375, -127.03343200683594, 112.41606903076172, 39.36286926269531, 84.56790161132812, 36.32160949707031, 166.9127655029297, 0.6621646881103516, -65.26268768310547, 276.45977783203125, -87.41370391845703, -120.74217224121094, 135.89442443847656, -25.19953155517578, -131.380615234375, 55.178199768066406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000482.npy"}
|
||||
{"epoch": 0.7286470143613001, "step": 483, "batch_size": 64, "mean": 80.93964385986328, "std": 102.66181182861328, "min": -187.83621215820312, "p10": -20.37302303314209, "median": 67.71084594726562, "p90": 204.07489929199218, "max": 323.2888488769531, "pos_frac": 0.765625, "sample": [21.98583221435547, 197.87074279785156, -15.825374603271484, 114.72660064697266, 175.47142028808594, 171.49362182617188, 174.8623504638672, -68.1016845703125, -20.515350341796875, -19.536869049072266, 80.19412994384766, 177.76695251464844, 148.63021850585938, 163.51023864746094, -60.10734176635742, 164.02310180664062, 67.85623168945312, -59.09113311767578, 4.03326416015625, 177.09951782226562, 72.06340026855469, 189.28952026367188, 186.91171264648438, 67.56546020507812, 203.88693237304688, 60.513832092285156, 244.73570251464844, -27.647621154785156, 11.071701049804688, 104.03875732421875, 134.308349609375, -159.08157348632812, 189.26760864257812, 4.916807174682617, -17.420196533203125, 323.2888488769531, -14.071998596191406, -187.83621215820312, 38.70879364013672, -19.53498077392578, 66.1334457397461, 34.797935485839844, 226.75363159179688, 105.64978790283203, 2.71923828125, 64.32809448242188, 241.84580993652344, 210.50588989257812, 69.26692962646484, 159.4087677001953, 42.60912322998047, 68.75331115722656, 32.375587463378906, 214.04122924804688, -20.040925979614258, 25.548765182495117, 58.812870025634766, 6.311304092407227, 204.15545654296875, 163.5814208984375, 24.917648315429688, -11.478904724121094, -3.1548194885253906, 190.97434997558594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000483.npy"}
|
||||
{"epoch": 0.7301587301587301, "step": 484, "batch_size": 64, "mean": 52.747291564941406, "std": 78.88449096679688, "min": -133.81588745117188, "p10": -31.750505447387688, "median": 38.490190505981445, "p90": 162.1383071899415, "max": 229.7076416015625, "pos_frac": 0.796875, "sample": [30.598983764648438, 11.093170166015625, -0.7162322998046875, -2.6041030883789062, 203.53326416015625, 4.60423469543457, 82.7548828125, 8.972930908203125, 137.02011108398438, 6.461204528808594, 28.980979919433594, -106.67704010009766, 76.68301391601562, 65.27536010742188, 99.88392639160156, 99.29124450683594, -64.74664306640625, -22.51584243774414, 83.94952392578125, 107.94591522216797, 2.3425350189208984, 171.16912841796875, 127.28489685058594, 4.658500671386719, 102.29096221923828, 113.33270263671875, 195.56800842285156, 33.50128173828125, 102.36714172363281, 22.35966682434082, 46.7448844909668, 8.726341247558594, 4.8438262939453125, -35.53376770019531, 229.7076416015625, -12.11368179321289, 20.911657333374023, 10.502937316894531, 77.87947082519531, -2.34600830078125, 139.71987915039062, -52.64586639404297, 223.93165588378906, -80.69094848632812, 141.06639099121094, 84.7596664428711, 182.64755249023438, -22.922893524169922, 71.982666015625, 21.109039306640625, 205.02066040039062, 17.86191177368164, 83.58355712890625, 27.221511840820312, -93.00870513916016, -133.81588745117188, 43.47909927368164, 63.096893310546875, 107.89389038085938, 113.84073638916016, 28.7337646484375, 65.77053833007812, 45.78390884399414, 17.420608520507812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000484.npy"}
|
||||
{"epoch": 0.7316704459561603, "step": 485, "batch_size": 64, "mean": 59.02231216430664, "std": 92.86990356445312, "min": -185.47235107421875, "p10": -22.97177867889404, "median": 23.480029106140137, "p90": 188.74640197753908, "max": 302.2733459472656, "pos_frac": 0.75, "sample": [7.808158874511719, 117.90924835205078, 89.76869201660156, 136.9430694580078, 122.2435302734375, -10.498756408691406, 1.825235366821289, 21.1999568939209, 59.84370422363281, -52.25634002685547, -134.32005310058594, 22.038619995117188, 189.2908935546875, 187.47592163085938, -10.4857177734375, 215.41407775878906, 8.24368667602539, 176.1468048095703, 234.49989318847656, 12.259260177612305, 13.453268051147461, 146.7088623046875, -20.886550903320312, 218.563232421875, -4.6663055419921875, 0.4759864807128906, -185.47235107421875, 99.84507751464844, 70.3501968383789, 85.08572387695312, -18.717308044433594, -4.314693450927734, 70.12601470947266, 0.0046100616455078125, 150.60975646972656, -82.08299255371094, 302.2733459472656, 196.24337768554688, 3.9400177001953125, 177.4463653564453, 22.736087799072266, -23.862253189086914, 34.5010986328125, 16.996177673339844, 81.65837860107422, 2.276002883911133, -5.860013961791992, 182.21023559570312, 112.57715606689453, 9.322860717773438, 15.81881332397461, 24.223970413208008, 135.27011108398438, 186.6755828857422, 95.2346420288086, 73.07156372070312, 52.68103790283203, 191.90089416503906, -20.894004821777344, -38.50391387939453, 26.9390869140625, -0.5963630676269531, 22.415176391601562, -33.699676513671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000485.npy"}
|
||||
{"epoch": 0.7331821617535903, "step": 486, "batch_size": 64, "mean": 55.09309005737305, "std": 118.09007263183594, "min": -201.96583557128906, "p10": -80.75713806152343, "median": 42.957313537597656, "p90": 210.82178344726566, "max": 386.5334167480469, "pos_frac": 0.6875, "sample": [25.806785583496094, -12.164535522460938, 292.941162109375, 4.027368545532227, -23.89141845703125, -3.3710670471191406, 43.00498962402344, 386.5334167480469, 23.132164001464844, -7.7531890869140625, 4.00050163269043, 188.94598388671875, 118.6050033569336, 91.76837921142578, 63.729286193847656, 39.27137756347656, -17.394920349121094, 199.56521606445312, -1.4505767822265625, 219.04861450195312, 122.52970123291016, 53.434661865234375, -66.87484741210938, -80.96163940429688, 215.64602661132812, -157.83460998535156, 42.909637451171875, -80.27996826171875, 57.091468811035156, -201.96583557128906, 230.02676391601562, 35.02561950683594, 55.9332275390625, -134.77317810058594, 0.9225711822509766, 147.73483276367188, 76.48693084716797, 161.30831909179688, -145.85595703125, 44.95199203491211, 0.24715042114257812, 180.19021606445312, 73.15487670898438, 188.3596649169922, 192.05197143554688, -76.35503387451172, 127.65608215332031, 156.3972625732422, 71.9356689453125, 182.00653076171875, 106.92166137695312, -38.53538513183594, 24.354293823242188, 235.9979248046875, -1.5151901245117188, -101.78810119628906, 1.2242851257324219, 113.05235290527344, 247.84779357910156, -21.664918899536133, 28.24951171875, -186.54800415039062, 59.74702072143555, -46.84027862548828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000486.npy"}
|
||||
{"epoch": 0.7346938775510204, "step": 487, "batch_size": 64, "mean": 68.5578384399414, "std": 109.72547149658203, "min": -165.9564666748047, "p10": -54.682049560546865, "median": 49.01375198364258, "p90": 195.60007934570316, "max": 446.5953063964844, "pos_frac": 0.765625, "sample": [105.59968566894531, -64.91964721679688, 171.683349609375, 116.27144622802734, 54.01898193359375, 7.32099723815918, -60.49644470214844, 14.130775451660156, 92.2944564819336, 166.96372985839844, -59.935546875, -26.577133178710938, 44.10332489013672, -67.74870300292969, -11.008293151855469, 187.03244018554688, 118.26329040527344, 23.380634307861328, 375.371337890625, 77.88882446289062, -1.5104293823242188, 222.85845947265625, 44.50505065917969, 198.76425170898438, 136.11170959472656, -17.730159759521484, 43.287696838378906, -144.6832275390625, 17.352272033691406, 68.99581909179688, 155.86239624023438, 72.45215606689453, 90.40974426269531, 0.06991004943847656, 188.21701049804688, 7.755500793457031, 134.22341918945312, -41.907684326171875, 53.52245330810547, 111.33607482910156, 204.3961181640625, 21.709964752197266, 81.79434204101562, 219.65872192382812, -138.33509826660156, 139.53164672851562, 1.2348785400390625, -42.42388916015625, 87.35758209228516, -165.9564666748047, -11.681047439575195, 185.35491943359375, 28.201026916503906, 25.044952392578125, 7.3745574951171875, 161.3946075439453, 31.46010971069336, 24.76806640625, -21.525146484375, 214.47776794433594, 110.83222198486328, 28.611825942993164, 446.5953063964844, 144.29464721679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000487.npy"}
|
||||
{"epoch": 0.7362055933484505, "step": 488, "batch_size": 64, "mean": 71.7989501953125, "std": 102.26532745361328, "min": -172.44337463378906, "p10": -29.704135894775387, "median": 52.82960891723633, "p90": 202.3036911010742, "max": 337.95501708984375, "pos_frac": 0.703125, "sample": [35.371337890625, 2.4241867065429688, -4.773469924926758, -7.685644149780273, 29.35887908935547, -78.14801788330078, -13.156562805175781, 194.0225372314453, 202.4613494873047, 88.50852966308594, 0.26862144470214844, -5.4650726318359375, 192.0474395751953, 19.2747802734375, 12.598716735839844, 236.12554931640625, -31.243606567382812, 179.40174865722656, 1.3769149780273438, 95.80792236328125, 126.9229507446289, 48.793907165527344, 4.448799133300781, -19.534103393554688, -57.31371307373047, 66.14391326904297, -101.37332153320312, 199.33316040039062, -7.000640869140625, 195.21939086914062, -17.653457641601562, -66.57087707519531, -14.923208236694336, 169.1802215576172, 216.26731872558594, 120.47421264648438, 183.9033660888672, -26.112037658691406, 227.73931884765625, 238.84483337402344, 230.8520050048828, 164.44961547851562, 173.46376037597656, -9.538238525390625, 99.22061920166016, 118.88391876220703, -10.530563354492188, 92.55244445800781, -49.22923278808594, 92.01312255859375, 65.2823486328125, 104.00210571289062, -172.44337463378906, 99.59938049316406, 8.505523681640625, 76.93502044677734, 337.95501708984375, 44.548641204833984, 200.3112030029297, 20.279003143310547, 14.709228515625, 56.86531066894531, -0.8562545776367188, 201.93582153320312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000488.npy"}
|
||||
{"epoch": 0.7377173091458806, "step": 489, "batch_size": 64, "mean": 44.51079559326172, "std": 109.57472229003906, "min": -209.8380126953125, "p10": -72.9920425415039, "median": 23.869540214538574, "p90": 178.68763122558593, "max": 280.9082336425781, "pos_frac": 0.640625, "sample": [-167.5682830810547, 166.5333251953125, 22.490280151367188, 170.75784301757812, 148.4816436767578, 179.4954833984375, -61.7122802734375, 164.6124267578125, 57.11118698120117, 250.91656494140625, 107.94120025634766, 19.462120056152344, 116.36111450195312, -72.95236206054688, -8.415763854980469, -73.00904846191406, -28.547325134277344, -29.33859634399414, 138.84539794921875, 24.9838809967041, 197.0933074951172, 69.77986145019531, 152.7908172607422, 22.755199432373047, 147.79574584960938, 147.47650146484375, -5.810447692871094, -57.30370330810547, -71.15850830078125, 49.24404525756836, -174.89341735839844, -55.0173454284668, 180.6505126953125, 18.73473358154297, 11.289215087890625, -16.15185546875, -30.87236785888672, -3.337465286254883, 176.80264282226562, 155.2452392578125, 51.814422607421875, 137.43499755859375, 18.94585418701172, 280.9082336425781, 66.16385650634766, -116.67507934570312, 198.58914184570312, 18.962158203125, 116.9316177368164, 112.10166931152344, -180.5335693359375, 21.145851135253906, -23.831844329833984, 12.341962814331055, -7.9697265625, -209.8380126953125, 169.28323364257812, 41.21990203857422, -127.96802520751953, 87.10873413085938, -67.30484008789062, 181.00326538085938, -1.8922901153564453, 29.187942504882812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000489.npy"}
|
||||
{"epoch": 0.7392290249433107, "step": 490, "batch_size": 64, "mean": 54.3193359375, "std": 126.72726440429688, "min": -267.0336608886719, "p10": -119.1797409057617, "median": 48.89709663391113, "p90": 203.17235870361327, "max": 356.209716796875, "pos_frac": 0.65625, "sample": [-39.96214294433594, 5.1288604736328125, -267.0336608886719, -5.783012390136719, -4.717859268188477, 94.7672119140625, 46.07676696777344, 185.38038635253906, 209.58737182617188, 264.2099914550781, -168.39971923828125, 196.64212036132812, -196.2149658203125, 8.055891036987305, 40.641082763671875, 99.5705337524414, 148.84698486328125, 112.6919174194336, 296.00396728515625, 24.46527862548828, 28.265106201171875, 203.3104248046875, 148.69007873535156, 219.31951904296875, -132.2044677734375, -49.953216552734375, 182.3084259033203, -3.314769744873047, 49.85342788696289, 20.36164093017578, -170.80300903320312, 122.31658935546875, 94.53097534179688, 99.40083312988281, 1.469919204711914, -18.038482666015625, -23.87964630126953, 202.85020446777344, -126.50375366210938, -16.903831481933594, 128.35781860351562, -66.9232177734375, 92.29792022705078, 156.05148315429688, -5.520326614379883, 137.0821533203125, 57.24249267578125, 6.423158645629883, -74.43299102783203, -20.383392333984375, 356.209716796875, 159.75860595703125, -21.557552337646484, 107.80055236816406, 237.48997497558594, 124.97833251953125, 195.30300903320312, 70.13263702392578, 47.940765380859375, 188.14083862304688, -203.4897918701172, -102.09037780761719, -27.507095336914062, 52.09994125366211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000490.npy"}
|
||||
{"epoch": 0.7407407407407407, "step": 491, "batch_size": 64, "mean": 59.31254577636719, "std": 97.25348663330078, "min": -219.93292236328125, "p10": -39.63824157714843, "median": 28.607462882995605, "p90": 189.99620361328127, "max": 344.6865234375, "pos_frac": 0.703125, "sample": [158.6387481689453, -28.196014404296875, 121.80885314941406, 25.99212646484375, 160.10678100585938, 89.24417877197266, -6.628900527954102, 137.859375, 176.90924072265625, 58.184818267822266, -10.3060302734375, -98.76712799072266, 194.68777465820312, 40.813751220703125, 169.69570922851562, -74.01744842529297, 16.661792755126953, 131.12457275390625, 14.17752456665039, 98.74864196777344, -55.017059326171875, 185.66244506835938, 194.43023681640625, -10.077800750732422, 31.22279930114746, 146.10203552246094, 15.180351257324219, -23.730789184570312, -12.826065063476562, 3.8202552795410156, 193.32373046875, 113.90666198730469, -44.54205322265625, 14.199333190917969, 130.20285034179688, 17.93564224243164, 36.16809844970703, 344.6865234375, 50.394805908203125, -56.505027770996094, 189.1163330078125, 14.611419677734375, 14.361671447753906, -16.58159065246582, -0.3679847717285156, 108.27857971191406, -17.31304931640625, -59.18449401855469, 103.0859146118164, 193.94056701660156, -6.242820739746094, 138.0792694091797, 225.8563995361328, 175.76473999023438, 8.449945449829102, 5.947296142578125, 3.7924880981445312, 70.61212921142578, 63.377403259277344, -219.93292236328125, -27.473722457885742, 6.663454055786133, -20.486698150634766, 190.373291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000491.npy"}
|
||||
{"epoch": 0.7422524565381708, "step": 492, "batch_size": 64, "mean": 49.850067138671875, "std": 114.1339111328125, "min": -273.55584716796875, "p10": -69.15263290405274, "median": 23.153959274291992, "p90": 205.7348876953125, "max": 280.54547119140625, "pos_frac": 0.6875, "sample": [280.54547119140625, 271.783203125, -119.6107177734375, 1.6179580688476562, 211.87847900390625, 9.999929428100586, 119.4814453125, 93.06924438476562, 87.26910400390625, -3.301767349243164, 167.91539001464844, 205.12661743164062, 70.3922119140625, -40.62621307373047, 17.265777587890625, 160.25985717773438, -198.83221435546875, 95.10643005371094, 4.0039215087890625, 136.2769317626953, 163.20323181152344, -70.74317169189453, -7.264778137207031, -27.38544464111328, 11.833793640136719, 90.67106628417969, 118.67487335205078, 227.32522583007812, -65.44137573242188, -10.286247253417969, 182.2310791015625, 9.577348709106445, 6.632293701171875, 77.51690673828125, -1.5806598663330078, 25.548019409179688, -273.55584716796875, 226.09136962890625, -115.178955078125, -1.4776840209960938, 153.20033264160156, 2.3984603881835938, 95.35137176513672, 8.575504302978516, -49.99049377441406, 128.11483764648438, 20.387142181396484, 60.68772888183594, -65.14847564697266, 146.1716766357422, -196.63031005859375, -41.888526916503906, -13.588165283203125, 155.82408142089844, 216.91326904296875, 20.759899139404297, 135.59295654296875, 78.0477294921875, 118.01237487792969, 32.46220397949219, 4.166589736938477, -108.74630737304688, -52.277557373046875, 205.99557495117188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000492.npy"}
|
||||
{"epoch": 0.7437641723356009, "step": 493, "batch_size": 64, "mean": 75.25961303710938, "std": 107.75605010986328, "min": -187.49505615234375, "p10": -62.924151611328114, "median": 66.23205184936523, "p90": 210.18735961914064, "max": 312.5135192871094, "pos_frac": 0.71875, "sample": [-1.1766510009765625, 15.296154022216797, -32.38714599609375, 170.3273468017578, -36.55097198486328, 72.90786743164062, -145.9998779296875, 12.907222747802734, 1.132638931274414, -68.7267837524414, 70.355224609375, 252.34048461914062, 195.25335693359375, 205.187255859375, 69.01783752441406, -68.59666442871094, 176.58639526367188, -2.2140121459960938, 134.12066650390625, 157.793212890625, 41.207611083984375, 41.08568572998047, -1.0037784576416016, 7.1412353515625, 63.446266174316406, -67.52415466308594, 154.8735809326172, -12.994583129882812, 203.07957458496094, 153.7162322998047, 144.91708374023438, 194.53326416015625, -109.96551513671875, -28.781970977783203, -80.87449645996094, 141.59640502929688, 188.1064453125, -27.958251953125, 218.765380859375, 1.951120376586914, 247.69854736328125, 124.99607849121094, 135.70851135253906, 312.5135192871094, 78.7038345336914, 212.33026123046875, -52.19081115722656, 212.33795166015625, 143.4724884033203, -9.376304626464844, -14.318023681640625, 172.22396850585938, 63.402732849121094, 21.695205688476562, 222.11862182617188, 157.9217529296875, 48.11585998535156, 53.2060546875, 124.3690185546875, 192.27658081054688, 10.554695129394531, 103.73895263671875, 39.720314025878906, -187.49505615234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000493.npy"}
|
||||
{"epoch": 0.745275888133031, "step": 494, "batch_size": 64, "mean": 63.656883239746094, "std": 119.19791412353516, "min": -253.67416381835938, "p10": -60.92870254516601, "median": 55.79199028015137, "p90": 191.18228607177736, "max": 379.40191650390625, "pos_frac": 0.65625, "sample": [134.2174530029297, 56.459739685058594, 71.30144500732422, 49.06413269042969, 188.93649291992188, 7.8826446533203125, -11.519981384277344, 162.18763732910156, 130.419921875, -125.3306884765625, 11.881889343261719, -42.79285430908203, -0.6274871826171875, 107.41352081298828, 136.2515106201172, 81.79776000976562, 180.83734130859375, -0.6293258666992188, 55.12424087524414, -109.94143676757812, 25.060314178466797, 379.40191650390625, 135.99534606933594, -28.0155029296875, 46.601295471191406, 186.8992156982422, 196.53656005859375, -7.0420989990234375, 154.13140869140625, 191.2362518310547, 175.544189453125, -190.94219970703125, -208.20254516601562, 177.4142303466797, -6.635568618774414, -103.26240539550781, -62.86455535888672, 173.22366333007812, 33.78211975097656, 153.31887817382812, 70.19673156738281, -2.382944107055664, -253.67416381835938, -4.402645111083984, 24.118633270263672, 50.31407928466797, 234.60191345214844, -5.102560043334961, -36.12127685546875, 51.312652587890625, 191.05636596679688, 75.59722900390625, 76.77580261230469, 264.7760009765625, 83.37609100341797, 91.3526382446289, -56.411712646484375, -9.91054916381836, 151.20135498046875, 207.73712158203125, 272.7392883300781, -45.349212646484375, -47.65397262573242, 184.779296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000494.npy"}
|
||||
{"epoch": 0.7467876039304611, "step": 495, "batch_size": 64, "mean": 52.02519607543945, "std": 118.28262329101562, "min": -209.3919219970703, "p10": -79.9395751953125, "median": 24.51388931274414, "p90": 191.62130432128907, "max": 429.965087890625, "pos_frac": 0.6875, "sample": [-77.39147186279297, -86.32500457763672, -209.3919219970703, -156.79678344726562, 165.7814178466797, -6.6968994140625, -137.73854064941406, 19.931015014648438, 97.56155395507812, -81.02717590332031, 2.9667205810546875, -151.58267211914062, -17.359848022460938, 81.84605407714844, 74.47689056396484, 190.837158203125, -33.24480438232422, 21.679779052734375, 126.51419830322266, -7.223997116088867, 34.4680290222168, 143.7870635986328, 159.63272094726562, 3.8293704986572266, 172.44302368164062, 191.95736694335938, -13.337604522705078, 98.64281463623047, -0.6764869689941406, 46.54337692260742, 429.965087890625, 5.460916519165039, 33.22614288330078, -77.40184020996094, 15.036520004272461, 41.926483154296875, 2.74493408203125, 141.39256286621094, 316.8414611816406, 161.19720458984375, -97.39886474609375, -12.380485534667969, 50.68825149536133, 62.16845703125, 218.42221069335938, 27.347999572753906, 80.13373565673828, -6.318572998046875, 5.120506286621094, 193.34625244140625, 7.582075119018555, 166.7946319580078, 52.70098114013672, -23.93243408203125, -72.60445404052734, 329.37811279296875, 92.5230712890625, -6.6935577392578125, 7.161109924316406, 79.14993286132812, 148.31314086914062, 19.437171936035156, 277.66314697265625, 6.515430450439453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000495.npy"}
|
||||
{"epoch": 0.7482993197278912, "step": 496, "batch_size": 64, "mean": 65.399658203125, "std": 130.87255859375, "min": -238.07687377929688, "p10": -80.31299743652343, "median": 42.52373123168945, "p90": 216.55214385986332, "max": 600.2198486328125, "pos_frac": 0.6875, "sample": [129.1246795654297, 166.96310424804688, 209.63739013671875, 179.4761505126953, -95.36957550048828, 45.56500244140625, 85.21401977539062, 316.8348693847656, 17.16733741760254, 121.93588256835938, -81.66242980957031, 37.549285888671875, 53.831748962402344, 45.319305419921875, 4.70881462097168, -10.538551330566406, -7.794193267822266, 14.156023025512695, 170.97679138183594, 39.72815704345703, -55.274147033691406, 600.2198486328125, -4.793094635009766, 201.00643920898438, 58.86993408203125, -238.07687377929688, 322.5223693847656, 163.520263671875, -1.1769981384277344, 260.82586669921875, -90.20935821533203, 10.19418716430664, 197.68763732910156, 223.6446533203125, 2.9928131103515625, 0.8830432891845703, -122.47418212890625, -8.222795486450195, 71.32205200195312, 117.1619644165039, 21.295318603515625, 62.83207702636719, 224.68685913085938, 95.6965560913086, 148.5750732421875, 48.10929870605469, -47.72114562988281, 91.36149597167969, -20.166107177734375, -2.269977569580078, 219.51560974121094, -175.2965087890625, -46.46717834472656, 4.057867050170898, 151.92080688476562, 181.10813903808594, 7.695671081542969, 141.11172485351562, -77.16432189941406, -3.47564697265625, -86.05210876464844, 82.82911682128906, 18.59124755859375, -8.643638610839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000496.npy"}
|
||||
{"epoch": 0.7498110355253212, "step": 497, "batch_size": 64, "mean": 73.43358612060547, "std": 105.10853576660156, "min": -195.97271728515625, "p10": -28.990818786621087, "median": 70.18764114379883, "p90": 204.81545562744142, "max": 261.7701416015625, "pos_frac": 0.703125, "sample": [183.2209930419922, 35.31317901611328, 261.7701416015625, 162.05712890625, 240.43231201171875, 28.017616271972656, -21.178848266601562, -7.4857330322265625, 14.696100234985352, 103.01625061035156, 138.82791137695312, 138.84292602539062, 110.63604736328125, -11.426902770996094, 138.07577514648438, 4.403388977050781, 138.29421997070312, -181.590576171875, 179.14613342285156, 216.73770141601562, -3.0918655395507812, -3.2827110290527344, -5.809822082519531, -6.640281677246094, 62.10508728027344, -42.16078186035156, -13.765533447265625, 202.20347595214844, 172.23243713378906, 194.01455688476562, -5.904573440551758, 103.85456848144531, 110.27545166015625, 213.0276641845703, -188.04660034179688, 174.10958862304688, 205.93487548828125, 83.01129913330078, 72.15809631347656, -32.9686279296875, 176.13845825195312, 130.72132873535156, -9.84201431274414, 82.52133178710938, 96.39057922363281, -32.33880615234375, 231.46759033203125, 125.47913360595703, -9.948707580566406, 34.18242645263672, 46.57651901245117, 60.0770263671875, 61.75837707519531, 144.75515747070312, 68.2171859741211, 234.75340270996094, -195.97271728515625, 4.835716247558594, -20.078739166259766, 189.50465393066406, 53.180572509765625, 52.420082092285156, -137.37989807128906, 149.26852416992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000497.npy"}
|
||||
{"epoch": 0.7513227513227513, "step": 498, "batch_size": 64, "mean": 72.49751281738281, "std": 98.82685089111328, "min": -287.3888244628906, "p10": -30.128607940673827, "median": 56.187564849853516, "p90": 196.5524444580078, "max": 261.13189697265625, "pos_frac": 0.828125, "sample": [-30.528732299804688, 116.60234832763672, 159.31190490722656, 85.48609924316406, 84.5028076171875, -4.7306060791015625, 173.14202880859375, 168.42486572265625, -17.03403091430664, -82.45759582519531, 191.26490783691406, 63.6274528503418, 82.29751586914062, -33.52196502685547, 161.91397094726562, 88.38064575195312, 10.691169738769531, -49.283302307128906, 107.30467987060547, 145.81472778320312, 261.13189697265625, 42.007606506347656, 194.54908752441406, 22.593914031982422, 50.945213317871094, 44.83604431152344, -33.363792419433594, 175.43638610839844, 176.62545776367188, 52.430763244628906, 214.7432861328125, 179.1821746826172, 3.550952911376953, 18.457870483398438, 49.032630920410156, -90.8646240234375, -8.757545471191406, 59.944366455078125, 1.9781475067138672, 6.000040054321289, 114.13221740722656, 193.6383056640625, -287.3888244628906, 151.17442321777344, 105.8534927368164, 35.81610870361328, 4.149059295654297, 62.21405792236328, 2.332691192626953, 62.334659576416016, 228.3828125, 197.41102600097656, 104.08977508544922, 20.23461151123047, 220.1106414794922, 4.21574592590332, 260.697998046875, 25.616485595703125, 11.631006240844727, 18.129562377929688, 40.39007568359375, -29.194984436035156, 22.143150329589844, 230.0583038330078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000498.npy"}
|
||||
{"epoch": 0.7528344671201814, "step": 499, "batch_size": 64, "mean": 71.96763610839844, "std": 128.60296630859375, "min": -191.93966674804688, "p10": -113.57125320434568, "median": 86.11226654052734, "p90": 225.21645660400392, "max": 405.71014404296875, "pos_frac": 0.765625, "sample": [122.7661361694336, 219.34234619140625, -187.74276733398438, 275.73101806640625, 22.798751831054688, 66.57827758789062, 29.247514724731445, -191.93966674804688, 25.002674102783203, 115.08689880371094, 78.78511810302734, 164.23085021972656, 147.8411102294922, -156.33343505859375, 210.78082275390625, 6.820182800292969, 405.71014404296875, -124.47384643554688, 203.10414123535156, 187.09921264648438, 86.13435363769531, -61.794952392578125, -34.558265686035156, -120.61470031738281, -97.13654327392578, 137.75347900390625, 86.09017944335938, 227.7339324951172, -70.77427673339844, 86.61956024169922, -49.46835708618164, 29.369770050048828, 126.15423583984375, 169.88783264160156, 84.47001647949219, -39.34660339355469, 25.021286010742188, 130.52841186523438, 9.121511459350586, 87.34600830078125, 228.29714965820312, 9.551521301269531, 205.87783813476562, 52.159915924072266, -190.62425231933594, 88.39791107177734, 100.35608673095703, 95.44271850585938, 241.27688598632812, 254.06739807128906, 203.17962646484375, 22.820846557617188, 56.44110107421875, -75.25640869140625, -172.9364776611328, 192.03561401367188, 137.685791015625, 150.46408081054688, -52.57110595703125, 264.019775390625, 18.98542022705078, 172.94085693359375, 154.66104125976562, 15.682937622070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000499.npy"}
|
||||
{"epoch": 0.7543461829176115, "step": 500, "batch_size": 64, "mean": 63.2963981628418, "std": 120.2086410522461, "min": -345.27386474609375, "p10": -90.62222824096679, "median": 74.76952362060547, "p90": 197.2190673828125, "max": 259.97918701171875, "pos_frac": 0.765625, "sample": [-88.29515075683594, 132.108154296875, 12.358924865722656, 8.960538864135742, 144.12799072265625, 90.56581115722656, 109.92041015625, 162.0921630859375, 112.49853515625, 199.29408264160156, 49.98091125488281, 182.93435668945312, 185.16433715820312, -90.14942169189453, 196.3129425048828, 13.655715942382812, 180.5266571044922, 59.81797790527344, 31.201332092285156, -17.4432373046875, 212.93893432617188, 174.92630004882812, 24.003517150878906, 156.74407958984375, -10.788497924804688, 197.60740661621094, 78.71908569335938, 12.019237518310547, -45.63501739501953, -134.8663330078125, -152.07333374023438, 106.44108581542969, 22.626445770263672, 57.822601318359375, -147.5894317626953, 8.598440170288086, 87.330810546875, 259.97918701171875, 216.96267700195312, 0.5148544311523438, 131.2162628173828, 16.414846420288086, 195.352294921875, 53.48105239868164, -35.8445930480957, 145.84146118164062, 81.48370361328125, -345.27386474609375, 181.94659423828125, -200.27645874023438, 226.40074157714844, -90.82485961914062, 3.2947463989257812, 168.24017333984375, 157.34999084472656, 10.271381378173828, -7.334510803222656, 70.81996154785156, 121.78504180908203, -52.89048767089844, -127.22376251220703, 151.01641845703125, 189.45086669921875, 204.35739135742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000500.npy"}
|
||||
{"epoch": 0.7558578987150416, "step": 501, "batch_size": 64, "mean": 57.29954528808594, "std": 80.4402847290039, "min": -153.5958709716797, "p10": -17.709339904785157, "median": 42.19590377807617, "p90": 173.17309570312503, "max": 226.96571350097656, "pos_frac": 0.71875, "sample": [23.37128448486328, 198.67135620117188, -20.879226684570312, 50.431522369384766, 127.22575378417969, 141.57798767089844, -36.17311096191406, 42.166175842285156, 10.045879364013672, 138.46853637695312, 149.94993591308594, 25.010908126831055, 78.65255737304688, -3.5349273681640625, -14.233320236206055, -9.71345329284668, 46.881256103515625, 176.91357421875, -6.523231506347656, -9.552591323852539, 63.762001037597656, -5.7195281982421875, 1.1932029724121094, 226.96571350097656, 184.9490966796875, 164.4453125, 162.20962524414062, 224.2601318359375, 1.6644477844238281, 15.097930908203125, 48.38677215576172, -86.65875244140625, -4.7838897705078125, 3.065143585205078, 86.808837890625, 86.71588134765625, 118.1579360961914, 1.4861831665039062, -0.5695457458496094, 92.51106262207031, 66.05783081054688, 156.63796997070312, 23.39431381225586, 130.38623046875, -21.226173400878906, -153.5958709716797, 60.79857635498047, 146.8675079345703, -61.24532699584961, 35.90691375732422, 42.22563171386719, 51.395042419433594, 16.51380157470703, -16.992324829101562, 85.69505310058594, -6.176294326782227, 24.8914794921875, 0.8154163360595703, 201.58364868164062, 184.7170867919922, 65.74180603027344, -0.2750663757324219, 158.36569213867188, -18.016632080078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000501.npy"}
|
||||
{"epoch": 0.7573696145124716, "step": 502, "batch_size": 64, "mean": 61.60658264160156, "std": 95.7365493774414, "min": -187.5530242919922, "p10": -49.70869369506834, "median": 55.223602294921875, "p90": 195.38229064941407, "max": 264.15093994140625, "pos_frac": 0.75, "sample": [-30.47089385986328, 8.33047866821289, 2.894378662109375, -56.26795959472656, 199.983154296875, 134.1229705810547, 30.13524627685547, 59.41644287109375, -106.67342376708984, 54.95794677734375, 29.555530548095703, 197.073974609375, -11.573829650878906, -60.34680938720703, 159.87442016601562, 264.15093994140625, 49.03289794921875, -187.5530242919922, -34.40373992919922, 113.94273376464844, 30.500411987304688, -17.739803314208984, 27.9438533782959, 166.54576110839844, 206.2625732421875, 88.44451141357422, 139.18624877929688, -18.587604522705078, -1.9084453582763672, 55.4892578125, 30.279747009277344, 9.639120101928711, 98.89054870605469, 7.308095932006836, 114.18382263183594, 86.82853698730469, 191.43502807617188, 218.48875427246094, -73.93045043945312, 1.4562416076660156, -11.460044860839844, 29.858543395996094, 78.77500915527344, 6.569366455078125, 97.59464263916016, -15.833572387695312, 37.747222900390625, 204.65170288085938, 83.6025161743164, -107.9324951171875, 70.09402465820312, 96.32122802734375, 168.02658081054688, 122.59100341796875, 246.03048706054688, -113.81912231445312, 63.30084228515625, 186.8836669921875, -22.909149169921875, 186.74533081054688, 1.4331398010253906, 81.74144744873047, 186.53369140625, 89.37761688232422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000502.npy"}
|
||||
{"epoch": 0.7588813303099018, "step": 503, "batch_size": 64, "mean": 64.46528625488281, "std": 104.28274536132812, "min": -221.70001220703125, "p10": -35.73982391357422, "median": 44.53240203857422, "p90": 191.8161590576172, "max": 231.2807159423828, "pos_frac": 0.765625, "sample": [162.24029541015625, 7.615573883056641, -33.005218505859375, 71.11302185058594, 130.484375, 40.14899444580078, 223.15402221679688, -221.70001220703125, -1.4478988647460938, 137.60464477539062, 166.95831298828125, -24.563339233398438, 26.3414306640625, 189.60728454589844, -36.16194152832031, 91.03274536132812, -34.7548828125, 5.6854248046875, 231.2807159423828, -54.692787170410156, 23.371910095214844, 134.61602783203125, 183.3628387451172, 41.50776672363281, 7.9698486328125, 47.557037353515625, 149.12152099609375, 205.54559326171875, -4.602275848388672, 144.58206176757812, -17.33102035522461, 80.49771881103516, 200.21522521972656, 192.61378479003906, 194.4656982421875, -20.691810607910156, 133.59152221679688, 0.8227920532226562, -150.95335388183594, 25.97650909423828, 1.2798576354980469, 26.051513671875, 5.611909866333008, 105.36331939697266, -3.044574737548828, 16.040481567382812, 55.84271240234375, 199.29397583007812, 138.00331115722656, 11.841766357421875, 187.80088806152344, 175.71124267578125, -162.6161346435547, 9.319435119628906, 171.7135772705078, 155.8080596923828, 4.280801773071289, -90.05965423583984, 163.4246826171875, 189.9550323486328, 14.277542114257812, 139.04959106445312, 128.25428771972656, -166.6056671142578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000503.npy"}
|
||||
{"epoch": 0.7603930461073318, "step": 504, "batch_size": 64, "mean": 84.18635559082031, "std": 92.57777404785156, "min": -154.44992065429688, "p10": -11.286505126953124, "median": 82.8190689086914, "p90": 204.6721206665039, "max": 291.5130615234375, "pos_frac": 0.84375, "sample": [16.09051513671875, 184.53721618652344, 1.7071075439453125, 133.82838439941406, 243.5267791748047, -16.970809936523438, 127.07318115234375, 24.937490463256836, 202.7352752685547, 15.280961990356445, 66.06698608398438, 37.936492919921875, 4.2418060302734375, 205.502197265625, 38.54082107543945, 174.65792846679688, 170.75967407226562, -110.08887481689453, 117.54183959960938, 145.75921630859375, -13.15298843383789, 101.03614807128906, 3.7575302124023438, 182.6283416748047, 209.43016052246094, -27.154449462890625, 78.29480743408203, 198.9412384033203, 103.17448425292969, 291.5130615234375, 233.10507202148438, 247.01657104492188, -7.032451629638672, 64.70039367675781, 89.94061279296875, -10.457305908203125, 6.221214294433594, -130.50250244140625, 173.85833740234375, 75.93124389648438, 171.45504760742188, -11.641876220703125, -3.161426544189453, 37.860443115234375, 85.45196533203125, 169.5293426513672, 109.00302124023438, 57.818397521972656, 11.737579345703125, 138.50369262695312, 122.89659118652344, 86.7259521484375, -154.44992065429688, 135.03109741210938, 126.9599609375, 31.216148376464844, 14.40553092956543, 80.18617248535156, 124.50283813476562, 31.85186195373535, 8.613351821899414, 214.87158203125, 104.90272521972656, 38.742835998535156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000504.npy"}
|
||||
{"epoch": 0.7619047619047619, "step": 505, "batch_size": 64, "mean": 55.31865692138672, "std": 116.62345886230469, "min": -255.08560180664062, "p10": -66.36237335205077, "median": 27.21999168395996, "p90": 195.2275863647461, "max": 356.3974609375, "pos_frac": 0.703125, "sample": [129.70098876953125, -11.242656707763672, 98.48570251464844, 27.21957015991211, 1.6983985900878906, 356.3974609375, -155.4432373046875, 266.53778076171875, 169.8888702392578, 189.56979370117188, 194.3617401123047, -18.492454528808594, 51.77501678466797, 4.8702545166015625, 21.04418182373047, -39.4197998046875, -32.62147521972656, -16.751934051513672, 139.4678497314453, 7.391513824462891, -93.91265869140625, 181.49302673339844, 149.95755004882812, 32.66037368774414, 70.0231704711914, 257.87994384765625, 3.4952316284179688, -70.88752746582031, 27.220413208007812, 0.3987236022949219, -55.803680419921875, 14.976524353027344, 195.59866333007812, -160.29498291015625, 207.95187377929688, 166.93426513671875, 137.87725830078125, 194.0601043701172, -6.361509323120117, -16.664840698242188, 40.63385009765625, 25.33324432373047, -12.815956115722656, -137.29446411132812, 34.577392578125, 72.38308715820312, 98.95758819580078, -15.268625259399414, 220.28065490722656, -150.1211395263672, -9.803733825683594, 176.87954711914062, 186.20738220214844, 41.58775329589844, 177.89797973632812, 44.51811218261719, 166.32745361328125, 21.820846557617188, 12.996261596679688, 208.11642456054688, 23.56032371520996, 1.0856552124023438, -53.41950225830078, -255.08560180664062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000505.npy"}
|
||||
{"epoch": 0.763416477702192, "step": 506, "batch_size": 64, "mean": 60.59484100341797, "std": 114.32213592529297, "min": -283.81048583984375, "p10": -47.75558967590331, "median": 51.51841354370117, "p90": 189.26802978515624, "max": 274.8984375, "pos_frac": 0.75, "sample": [27.932287216186523, 156.81265258789062, 8.406681060791016, -26.792442321777344, 179.6219024658203, 186.41490173339844, 82.61241149902344, -53.4327507019043, 159.35580444335938, -34.508880615234375, 4.229978561401367, 90.70255279541016, 53.50408172607422, 0.08466339111328125, 118.24073791503906, 127.74905395507812, -188.03305053710938, 238.2372589111328, 251.21121215820312, 25.162513732910156, 190.77487182617188, -157.2818603515625, -12.555770874023438, -283.81048583984375, -153.19671630859375, -185.02392578125, 42.69419860839844, 201.9116973876953, 253.01666259765625, 189.57522583007812, 186.5795135498047, -16.353591918945312, 274.8984375, -17.28338623046875, 185.94503784179688, 25.20348358154297, 169.55929565429688, 90.78092193603516, 138.65408325195312, 3.794952392578125, -108.88470458984375, 15.66131591796875, 11.972549438476562, 11.945327758789062, 100.43161010742188, 84.02743530273438, 0.5554084777832031, 65.55963897705078, 131.9950408935547, 56.74115753173828, 37.84490966796875, 188.55123901367188, -6.041252136230469, 152.23110961914062, 131.39791870117188, -3.5440444946289062, 11.251792907714844, 132.98599243164062, -13.310890197753906, -8.710468292236328, 49.532745361328125, 188.10546875, 111.4156494140625, 0.9607696533203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000506.npy"}
|
||||
{"epoch": 0.764928193499622, "step": 507, "batch_size": 64, "mean": 65.41321563720703, "std": 121.59778594970703, "min": -250.78387451171875, "p10": -70.41196289062499, "median": 47.8504638671875, "p90": 225.67467041015632, "max": 377.07354736328125, "pos_frac": 0.6875, "sample": [177.5556640625, 9.46099853515625, -9.555854797363281, 45.9315185546875, 358.67144775390625, -13.487709045410156, 297.6909484863281, -250.78387451171875, -86.78608703613281, 130.9517822265625, 94.70115661621094, 55.01219177246094, -61.3856201171875, 132.52200317382812, -51.273193359375, -98.11517333984375, 88.98233032226562, -25.827781677246094, 188.86053466796875, 30.19699478149414, 55.73857116699219, 49.7694091796875, 180.2562713623047, -27.7493839263916, 208.25375366210938, -15.155847549438477, 70.54430389404297, 254.12326049804688, 100.58146667480469, 5.92132568359375, 68.60040283203125, 9.80413818359375, 294.2091064453125, -50.392967224121094, 202.330322265625, 140.34915161132812, -74.2803955078125, 207.6790008544922, -94.06114959716797, 2.9582748413085938, -38.824188232421875, 5.362518310546875, -5.6691436767578125, 156.3945770263672, 132.61293029785156, 19.481002807617188, 31.577293395996094, 18.628952026367188, 377.07354736328125, 237.6944580078125, -57.95265197753906, 116.70919799804688, -41.78534698486328, 8.237953186035156, 116.08767700195312, -1.8765182495117188, 117.48455810546875, -106.03437805175781, 56.353904724121094, -108.977294921875, 21.2877197265625, 233.14077758789062, 123.98974609375, 172.64703369140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000507.npy"}
|
||||
{"epoch": 0.7664399092970522, "step": 508, "batch_size": 64, "mean": 75.23462677001953, "std": 113.32268524169922, "min": -175.4059295654297, "p10": -62.90342636108399, "median": 64.81293487548828, "p90": 220.63682250976564, "max": 299.1429443359375, "pos_frac": 0.734375, "sample": [155.10850524902344, 30.12249755859375, 159.62562561035156, 13.327407836914062, 45.978736877441406, -59.17449951171875, 237.6700439453125, -2.6837234497070312, 149.9956512451172, 217.99981689453125, 259.69207763671875, 171.04490661621094, 34.66139221191406, 5.118896484375, 28.25787353515625, 132.34512329101562, -175.4059295654297, 48.80084228515625, 23.542938232421875, -76.38824462890625, 11.5128173828125, 13.140289306640625, 153.04531860351562, -65.43242645263672, -62.38642120361328, -169.82858276367188, 81.80731201171875, 0.5173664093017578, 299.1429443359375, 93.86749267578125, -63.125, 278.40423583984375, 98.73509216308594, 85.52165985107422, 150.77099609375, 123.74844360351562, 108.95890808105469, 114.53662109375, 189.03048706054688, -2.7646255493164062, -0.6916351318359375, -27.74597930908203, 171.66903686523438, 204.08895874023438, -11.865837097167969, -99.17390441894531, 146.53005981445312, 251.03631591796875, 221.74893188476562, 6.947404861450195, 200.0869140625, -3.2142333984375, 66.00216674804688, -1.973318099975586, 6.0059051513671875, -156.30935668945312, -18.606246948242188, 201.87655639648438, 218.04190063476562, 226.81121826171875, 84.77884674072266, 63.62370300292969, 10.492889404296875, 216.0128173828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000508.npy"}
|
||||
{"epoch": 0.7679516250944822, "step": 509, "batch_size": 64, "mean": 60.04317855834961, "std": 106.41165924072266, "min": -193.713134765625, "p10": -91.207218170166, "median": 63.51507568359375, "p90": 198.46633300781252, "max": 236.34259033203125, "pos_frac": 0.703125, "sample": [53.55412292480469, -45.59829330444336, 88.16639709472656, 182.59588623046875, 71.22248840332031, -2.4837398529052734, 9.040252685546875, -18.465105056762695, -98.23429107666016, 114.47623443603516, 22.87743377685547, 71.58346557617188, -152.13204956054688, 88.62820434570312, 177.94497680664062, -133.7729034423828, 173.89892578125, 152.58059692382812, -38.72853088378906, 24.473541259765625, 157.0414581298828, -111.86646270751953, 23.3619384765625, -22.04950714111328, 236.34259033203125, 35.74101257324219, -37.134849548339844, 40.74580001831055, 75.75621032714844, -7.04449462890625, 231.85501098632812, 27.136066436767578, 199.76229858398438, 150.01364135742188, 27.81304931640625, -2.1382274627685547, 178.36285400390625, 91.85688781738281, 152.16287231445312, 179.90426635742188, 15.484672546386719, 0.0213165283203125, 213.91024780273438, -74.81071472167969, 132.21954345703125, 213.13389587402344, 55.80766296386719, 89.81929016113281, 210.899658203125, 165.10733032226562, 195.44241333007812, -30.509803771972656, -50.864830017089844, -193.713134765625, -6.1906585693359375, 165.73341369628906, 113.3458023071289, -138.26107788085938, -111.97199249267578, 80.50594329833984, 9.762882232666016, 75.3038101196289, 201.7287139892578, 141.60899353027344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000509.npy"}
|
||||
{"epoch": 0.7694633408919124, "step": 510, "batch_size": 64, "mean": 75.83061981201172, "std": 127.90099334716797, "min": -198.25814819335938, "p10": -82.50048370361324, "median": 51.796512603759766, "p90": 244.05356140136723, "max": 402.0762023925781, "pos_frac": 0.765625, "sample": [73.84632873535156, 188.17555236816406, 200.25070190429688, 164.18409729003906, 17.825775146484375, 18.439239501953125, 20.211467742919922, 225.05413818359375, -4.893653869628906, 10.85162353515625, -45.93627166748047, 42.30608367919922, 20.563457489013672, 47.980552673339844, -159.4605255126953, -15.7685546875, 9.377090454101562, 53.28910827636719, -33.229400634765625, 12.98149299621582, 74.76318359375, 188.8168487548828, 208.96107482910156, -98.17086029052734, 6.646003723144531, 189.90892028808594, 40.962425231933594, -34.44694519042969, -0.3110237121582031, 154.112548828125, 49.551971435546875, 123.03408813476562, 284.37091064453125, 80.90901947021484, 11.65176010131836, 162.7120361328125, 92.76415252685547, 262.47735595703125, 69.86029052734375, 5.70928955078125, 6.695951461791992, -166.0035400390625, 185.86660766601562, 65.02293395996094, 270.8672180175781, 23.34223175048828, 164.73013305664062, 247.503662109375, 379.8708190917969, -107.33570098876953, 50.303916931152344, 78.43757629394531, 184.15158081054688, 172.54299926757812, -140.85870361328125, -0.8916664123535156, 253.39633178710938, 236.00332641601562, -13.486099243164062, 130.25833129882812, 402.0762023925781, 73.61766052246094, -165.02516174316406, -198.25814819335938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000510.npy"}
|
||||
{"epoch": 0.7709750566893424, "step": 511, "batch_size": 64, "mean": 86.90606689453125, "std": 104.15306854248047, "min": -146.2301025390625, "p10": -29.468530654907227, "median": 75.78649520874023, "p90": 209.65808563232423, "max": 265.4393310546875, "pos_frac": 0.765625, "sample": [211.1859130859375, 206.09315490722656, 86.14412689208984, 193.8124237060547, 26.677188873291016, -21.485488891601562, 46.376102447509766, -6.803947448730469, 160.70689392089844, 205.23880004882812, 110.868408203125, -71.66349029541016, 17.548019409179688, 70.55254364013672, 187.68157958984375, 242.8761749267578, 172.00489807128906, -29.555191040039062, 148.39993286132812, 119.18212890625, 196.4632568359375, -1.21575927734375, 67.9645767211914, -99.13428497314453, -130.46766662597656, -21.856353759765625, 216.51527404785156, 146.94361877441406, 134.8341064453125, 10.06231689453125, 37.292449951171875, 181.18362426757812, 37.89166259765625, 64.78994750976562, 179.48837280273438, 144.07301330566406, 173.22189331054688, 30.459774017333984, -15.614677429199219, 18.174039840698242, 55.07603454589844, 77.65348815917969, 193.1643524169922, 226.79376220703125, 265.4393310546875, 161.5657958984375, -146.2301025390625, 62.394683837890625, -17.560518264770508, 188.8314666748047, 37.931190490722656, 0.25946044921875, 147.14691162109375, 175.38677978515625, 9.891067504882812, 73.91950225830078, -107.24372863769531, 238.7845458984375, 223.30397033691406, 161.26815795898438, 204.32321166992188, -4.268482208251953, -83.48580932617188, -29.26632308959961], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000511.npy"}
|
||||
{"epoch": 0.7724867724867724, "step": 512, "batch_size": 64, "mean": 55.7491340637207, "std": 109.60863494873047, "min": -293.53009033203125, "p10": -68.55424232482909, "median": 53.32527542114258, "p90": 182.33264465332033, "max": 242.3016815185547, "pos_frac": 0.734375, "sample": [176.59634399414062, 84.80081176757812, 20.622316360473633, -165.55645751953125, -73.0872802734375, 185.3546142578125, 62.418697357177734, 11.883245468139648, 65.96752166748047, 136.6129608154297, 157.5239715576172, 210.47889709472656, -293.53009033203125, -57.97715377807617, 10.988616943359375, -23.76220703125, -16.01630401611328, 2.151744842529297, 171.20108032226562, 6.087610244750977, 203.78155517578125, 182.8970947265625, 8.509544372558594, 171.86209106445312, -32.68055725097656, 147.23040771484375, -98.67340087890625, 178.65274047851562, -48.15100860595703, 157.9318084716797, 2.4230384826660156, 168.96844482421875, 167.731201171875, 5.939611434936523, 146.10421752929688, 38.448699951171875, 6.0954132080078125, 104.77220153808594, 3.542764663696289, 191.01226806640625, -2.4665374755859375, -154.14002990722656, -11.15721321105957, -37.41764831542969, 92.36201477050781, 242.3016815185547, 52.309669494628906, 23.693523406982422, 66.0167236328125, 4.843898773193359, 171.5411376953125, -6.287330627441406, 54.34088134765625, -144.458740234375, 55.391632080078125, 161.8735809326172, 210.3594970703125, 92.71006774902344, 181.01559448242188, 166.4129638671875, 44.78356170654297, -42.496116638183594, 88.95124816894531, -121.69669342041016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000512.npy"}
|
||||
{"epoch": 0.7739984882842026, "step": 513, "batch_size": 64, "mean": 72.06602478027344, "std": 95.55028533935547, "min": -172.9080810546875, "p10": -27.622324752807618, "median": 62.18646812438965, "p90": 201.11419830322265, "max": 333.2655029296875, "pos_frac": 0.78125, "sample": [294.902587890625, 153.0142059326172, 184.53431701660156, 113.39790344238281, 151.6427001953125, 62.95753860473633, 157.16299438476562, -1.7985038757324219, -1.4429969787597656, -10.245597839355469, 67.94469451904297, -82.85943603515625, 172.8405303955078, 127.0849609375, 111.54899597167969, 114.89027404785156, -27.93930435180664, 11.640434265136719, 199.22637939453125, 4.014610290527344, 31.289382934570312, 217.1340789794922, 130.51528930664062, 90.20944213867188, 214.43609619140625, 5.43585205078125, -26.882705688476562, 120.58477783203125, 28.236513137817383, 92.05008697509766, 18.311622619628906, 88.66415405273438, 125.37743377685547, -172.9080810546875, 69.48238372802734, 13.995141983032227, -50.40380096435547, 187.44854736328125, 333.2655029296875, 23.485126495361328, 1.6662178039550781, -1.497091293334961, 232.8065185546875, 13.1092529296875, 151.84442138671875, 201.9232635498047, 220.84066772460938, 13.001502990722656, 150.39657592773438, 99.46630859375, 61.41539764404297, -5.4835662841796875, 30.837608337402344, 66.56828308105469, -35.24737548828125, -72.9013671875, 40.715797424316406, 18.734249114990234, -3.232013702392578, 32.89891052246094, 98.52264404296875, -59.77092742919922, 9.377151489257812, 3.9886741638183594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000513.npy"}
|
||||
{"epoch": 0.7755102040816326, "step": 514, "batch_size": 64, "mean": 79.54997253417969, "std": 112.23475646972656, "min": -211.15493774414062, "p10": -66.24002342224118, "median": 90.76052474975586, "p90": 198.9321960449219, "max": 395.1710205078125, "pos_frac": 0.765625, "sample": [5.224851608276367, 145.954345703125, 116.29920959472656, 112.65180969238281, 100.98078918457031, 90.0583267211914, 183.424560546875, -25.145278930664062, 68.67919921875, -15.87289810180664, 26.785099029541016, 207.0349884033203, 139.08445739746094, 90.71560668945312, -6.880426406860352, 85.3451919555664, -129.44400024414062, 146.46807861328125, 11.35858154296875, 64.20189666748047, -191.20277404785156, 1.1685867309570312, 180.70535278320312, 148.3387451171875, 211.49566650390625, 12.059036254882812, 68.93850708007812, 165.69293212890625, 130.4592742919922, 232.25531005859375, -78.0404052734375, -83.02645111083984, 160.93490600585938, 201.2177734375, 13.718437194824219, 93.32465362548828, 122.19296264648438, 171.69964599609375, -15.650436401367188, 135.8403778076172, 55.870262145996094, -96.46769714355469, -38.7057991027832, 14.588104248046875, 179.48121643066406, 90.8054428100586, 180.3431854248047, -31.602706909179688, 22.587738037109375, 114.25312042236328, -16.690818786621094, 52.86371994018555, -13.039894104003906, 190.28277587890625, -211.15493774414062, 138.48117065429688, 221.1887664794922, 182.65945434570312, 262.6496887207031, 6.583288192749023, 395.1710205078125, 190.27783203125, -91.87263488769531, 193.59918212890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000514.npy"}
|
||||
{"epoch": 0.7770219198790628, "step": 515, "batch_size": 64, "mean": 62.067771911621094, "std": 107.71669006347656, "min": -254.31680297851562, "p10": -80.78687591552735, "median": 66.72040176391602, "p90": 202.63451080322267, "max": 236.66358947753906, "pos_frac": 0.703125, "sample": [-16.075634002685547, 125.52436065673828, 1.0976181030273438, 46.47663116455078, 236.66358947753906, 171.98675537109375, 13.59005355834961, 187.7550811767578, 8.3763427734375, 205.20993041992188, 75.04108428955078, 44.274330139160156, 100.8568344116211, -9.61870002746582, 128.82847595214844, 90.9486083984375, -105.76272583007812, 210.44032287597656, -91.22545623779297, -127.8760986328125, 91.48163604736328, 3.0625457763671875, -26.9322509765625, 47.91468811035156, -254.31680297851562, 136.78501892089844, 193.31625366210938, 66.97100830078125, -34.84613800048828, -49.61620330810547, 127.5529556274414, 164.97335815429688, -1.5739631652832031, -0.7513942718505859, 52.15149688720703, 172.94906616210938, -111.529541015625, 173.87648010253906, -17.720983505249023, 130.7155303955078, -82.15257263183594, 99.93084716796875, 225.0087432861328, -147.79043579101562, 96.34318542480469, 217.01478576660156, 28.034135818481445, -23.6362361907959, 66.46979522705078, 193.51406860351562, 21.78685760498047, 146.03619384765625, 121.54251098632812, 196.6251983642578, 225.7693634033203, -77.60025024414062, 147.86380004882812, 3.4710731506347656, 211.12062072753906, -51.699119567871094, 86.8622055053711, 103.14407348632812, -2.6408939361572266, 6.345237731933594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000515.npy"}
|
||||
{"epoch": 0.7785336356764928, "step": 516, "batch_size": 64, "mean": 61.22477722167969, "std": 107.70653533935547, "min": -231.08657836914062, "p10": -64.30815963745117, "median": 62.52565383911133, "p90": 181.73559722900393, "max": 231.60203552246094, "pos_frac": 0.75, "sample": [146.62002563476562, 206.67686462402344, 160.95455932617188, -90.20329284667969, -64.40818786621094, 10.09476089477539, -60.488990783691406, 151.53318786621094, 182.7322998046875, 139.5374755859375, 59.28441619873047, 29.69841194152832, 192.2666015625, 226.5453338623047, 1.1740760803222656, 6.91520881652832, 110.66156005859375, 177.83380126953125, 231.60203552246094, 167.35035705566406, -64.07476043701172, -199.4357452392578, 82.75680541992188, 93.67928314208984, 160.23150634765625, 65.76689147949219, 175.89390563964844, 8.401473999023438, 175.7160186767578, 134.45040893554688, -231.08657836914062, 24.090240478515625, 0.3895587921142578, -14.408760070800781, -14.639595031738281, 172.7213134765625, 177.62005615234375, -6.729192733764648, 3.4881019592285156, 14.40219497680664, 147.2227020263672, -198.62313842773438, 125.03539276123047, 196.89889526367188, 55.036834716796875, 52.32635498046875, 179.4099578857422, 53.979732513427734, 83.42970275878906, 118.84593200683594, 4.9783172607421875, 160.76678466796875, 13.2093505859375, -61.853424072265625, -123.49105834960938, 119.22886657714844, -36.39158248901367, 99.26573181152344, -8.173652648925781, 16.259090423583984, 70.48490905761719, -2.8700523376464844, -82.98751068115234, 190.78411865234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000516.npy"}
|
||||
{"epoch": 0.780045351473923, "step": 517, "batch_size": 64, "mean": 42.78327178955078, "std": 102.11988067626953, "min": -175.05313110351562, "p10": -98.67407531738282, "median": 23.29499626159668, "p90": 186.32250823974613, "max": 272.3854064941406, "pos_frac": 0.65625, "sample": [16.50454330444336, -18.575119018554688, -1.8997230529785156, 101.88151550292969, -57.432533264160156, 19.095417022705078, -67.17277526855469, 108.03347778320312, 222.9332275390625, 37.28403091430664, 4.4779815673828125, -15.627082824707031, -74.68035888671875, -9.739006042480469, 20.65301513671875, -50.497825622558594, -5.133806228637695, -8.758811950683594, 37.017364501953125, 18.064178466796875, 112.04023742675781, -129.46604919433594, 272.3854064941406, 162.81954956054688, -175.05313110351562, 54.79346466064453, -15.506439208984375, 94.66027069091797, 63.489044189453125, 169.56983947753906, 57.89618682861328, 165.46702575683594, 5.182893753051758, 67.95709991455078, -104.05572509765625, -99.45035552978516, 26.040130615234375, 170.76747131347656, -40.122772216796875, -124.25896453857422, 72.3398208618164, 200.12808227539062, 257.3671875, 50.09284591674805, 111.59544372558594, 167.78271484375, 8.293338775634766, 74.24140930175781, -115.2492904663086, -28.664085388183594, -96.86275482177734, 99.05123901367188, 19.201431274414062, 179.82455444335938, 224.6962890625, 195.9539794921875, 56.08189392089844, -11.195735931396484, 23.872154235839844, 22.717838287353516, 189.1073455810547, -114.60490417480469, 120.47434997558594, 20.30133056640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000517.npy"}
|
||||
{"epoch": 0.781557067271353, "step": 518, "batch_size": 64, "mean": 64.81591033935547, "std": 105.97953033447266, "min": -178.45118713378906, "p10": -60.36960220336913, "median": 35.646244049072266, "p90": 210.47751770019534, "max": 324.40185546875, "pos_frac": 0.75, "sample": [104.11810302734375, -2.521942138671875, -2.5337600708007812, 206.76583862304688, -27.4144287109375, 8.573768615722656, 26.577171325683594, 116.56245422363281, -47.860172271728516, 32.436241149902344, 81.04150390625, -70.95779418945312, 57.773597717285156, 95.90586853027344, 115.10478973388672, -90.40049743652344, 49.40635681152344, 132.8939208984375, -89.2757568359375, 191.7751922607422, 309.57958984375, -178.45118713378906, 0.6276016235351562, 123.40020751953125, 147.04202270507812, 175.7378692626953, -63.550933837890625, 147.20570373535156, -78.36537170410156, 141.49176025390625, 8.933277130126953, -46.389671325683594, 4.025815963745117, 109.25001525878906, 36.74755859375, 1.03271484375, 3.0862998962402344, 2.268157958984375, 96.32756042480469, 281.43414306640625, 19.263259887695312, 59.019508361816406, 0.15119361877441406, 212.0682373046875, 131.70982360839844, 324.40185546875, -68.37307739257812, 173.4991912841797, 243.35806274414062, -52.946495056152344, -10.949792861938477, -16.81243324279785, 232.99093627929688, 0.4597129821777344, 34.54492950439453, 174.45333862304688, 155.808837890625, 51.803466796875, -2.150789260864258, 1.0492401123046875, 4.214210510253906, 7.304357528686523, 237.2237548828125, 126.72340393066406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000518.npy"}
|
||||
{"epoch": 0.783068783068783, "step": 519, "batch_size": 64, "mean": 37.75080108642578, "std": 129.95037841796875, "min": -329.89801025390625, "p10": -122.76853485107421, "median": 41.00959396362305, "p90": 195.95724792480468, "max": 238.156494140625, "pos_frac": 0.609375, "sample": [-156.73695373535156, 175.98583984375, 117.59890747070312, -200.2821502685547, -49.75752639770508, 192.3871612548828, -9.133464813232422, 238.156494140625, 22.853225708007812, 163.86227416992188, 53.300384521484375, 32.79224395751953, 194.12521362304688, -51.206077575683594, 73.3794937133789, 107.01718139648438, 192.56668090820312, 21.32132911682129, -177.87802124023438, -49.54248046875, 185.95176696777344, 197.6890106201172, -181.3837127685547, 230.5287322998047, 162.4968719482422, 196.19256591796875, -87.45472717285156, -1.9911880493164062, 61.72947692871094, 106.67398071289062, -65.45252990722656, 170.22569274902344, 40.93003845214844, -74.552001953125, 92.21686553955078, -67.1971435546875, -85.26692199707031, 4.263652801513672, 225.3575897216797, -110.84475708007812, 109.5758056640625, 6.6533660888671875, 187.80593872070312, 68.94591522216797, 41.089149475097656, -329.89801025390625, 62.58106994628906, -55.273712158203125, 42.752349853515625, -126.35839080810547, 187.32994079589844, -76.62509155273438, -131.47740173339844, 231.96115112304688, 63.60447692871094, 25.717750549316406, 220.11463928222656, 120.3947982788086, -1.3211174011230469, -114.39220428466797, -71.81416320800781, -29.317529678344727, 195.40817260742188, -102.32872009277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000519.npy"}
|
||||
{"epoch": 0.7845804988662132, "step": 520, "batch_size": 64, "mean": 55.13550567626953, "std": 100.93441009521484, "min": -170.52037048339844, "p10": -56.520948791503905, "median": 36.23203659057617, "p90": 182.84978637695312, "max": 346.1098327636719, "pos_frac": 0.703125, "sample": [160.07130432128906, -19.39776039123535, -3.1178855895996094, -17.77170181274414, 180.58465576171875, 27.894683837890625, 87.30220031738281, 8.595359802246094, -4.154308319091797, 346.1098327636719, 146.67526245117188, 6.849969863891602, -23.86750030517578, -54.191314697265625, 93.55767822265625, 86.52513885498047, 21.880149841308594, -57.51936340332031, 11.897884368896484, 179.36366271972656, 280.51397705078125, 86.63626098632812, 33.82624053955078, 201.89649963378906, 81.400390625, 54.08155059814453, 22.497756958007812, -91.16529846191406, -115.24542999267578, 144.18722534179688, 66.91038513183594, -2.7567977905273438, -7.7950592041015625, 21.55992889404297, 5.4221649169921875, -1.1808509826660156, 194.1129150390625, 110.9836654663086, 25.963272094726562, -107.84107208251953, -170.52037048339844, 44.61247253417969, 58.96360778808594, -110.48646545410156, 81.77413940429688, 75.16609954833984, 13.005691528320312, 135.06068420410156, 125.42149353027344, 38.63783264160156, 44.57273864746094, 199.76834106445312, 167.47703552246094, 70.14884948730469, 260.3052978515625, 33.100563049316406, 81.47244262695312, 2.881378173828125, -12.662220001220703, -0.5364227294921875, -5.820442199707031, 183.73948669433594, 180.77381896972656, -149.4795379638672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000520.npy"}
|
||||
{"epoch": 0.7860922146636432, "step": 521, "batch_size": 64, "mean": 82.13256072998047, "std": 105.06463623046875, "min": -206.87933349609375, "p10": -16.136951446533203, "median": 77.84992218017578, "p90": 193.21112976074218, "max": 385.7184143066406, "pos_frac": 0.734375, "sample": [178.93228149414062, 64.95782470703125, 163.34088134765625, 67.4691162109375, 177.65615844726562, 122.5188217163086, 178.9296875, -6.8211517333984375, -5.1754608154296875, 45.611419677734375, 61.096893310546875, -31.160320281982422, 193.48825073242188, 72.60970306396484, 154.02713012695312, 97.2826919555664, 13.70207405090332, 23.16461944580078, -6.9772796630859375, 183.5803680419922, -13.478675842285156, 192.56451416015625, 44.62178039550781, 15.15997314453125, -9.32628059387207, 142.0604248046875, -110.54844665527344, 190.46511840820312, -13.939208984375, 185.65069580078125, 242.8700408935547, 385.7184143066406, -114.98357391357422, 22.413738250732422, -10.790260314941406, -16.34215545654297, -12.815437316894531, 101.00395202636719, 2.4432220458984375, 242.5479736328125, 244.6948699951172, 16.171154022216797, -61.442604064941406, 97.8180923461914, -2.3001155853271484, 23.921781539916992, 165.4598388671875, 104.62043762207031, 8.68368148803711, 110.86558532714844, 162.6725311279297, 110.0654525756836, -206.87933349609375, 277.4491882324219, 181.45187377929688, 115.16496276855469, 56.70707702636719, -35.748199462890625, 93.14140319824219, -15.65814208984375, 123.49349975585938, 83.09014129638672, 204.99163818359375, 184.51947021484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000521.npy"}
|
||||
{"epoch": 0.7876039304610734, "step": 522, "batch_size": 64, "mean": 50.66620635986328, "std": 123.8101577758789, "min": -280.7487487792969, "p10": -92.03233566284179, "median": 45.406124114990234, "p90": 210.4472412109375, "max": 282.57464599609375, "pos_frac": 0.65625, "sample": [-60.7569580078125, 81.13933563232422, 5.153116226196289, 81.85736846923828, 102.05689239501953, 189.7128448486328, 64.57059478759766, 236.88583374023438, 184.5748291015625, 75.5060806274414, -168.07435607910156, -131.15725708007812, 72.44544982910156, 212.34234619140625, 41.18684387207031, 171.4515380859375, -59.917991638183594, 82.43096923828125, 157.5789031982422, 13.806028366088867, 209.6322479248047, -2.735157012939453, 61.83391571044922, -253.40167236328125, 281.6438293457031, 2.1292343139648438, -4.098079681396484, -87.33818817138672, -280.7487487792969, 19.296571731567383, -20.936874389648438, 197.50033569335938, -43.57603454589844, 224.11009216308594, 158.26025390625, 88.31541442871094, -10.433609008789062, 63.26484680175781, 13.594524383544922, 49.625404357910156, 35.34723663330078, -11.716575622558594, 8.788734436035156, -35.55200958251953, 123.80186462402344, -0.38545989990234375, 127.62350463867188, -197.1523895263672, 210.79652404785156, -63.19874572753906, -19.797752380371094, -127.07838439941406, 170.74343872070312, 282.57464599609375, -13.75067138671875, 16.746971130371094, -94.04411315917969, 255.5054473876953, 151.14608764648438, 136.83917236328125, 3.138986587524414, -29.181339263916016, 106.25436401367188, 186.45687866210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000522.npy"}
|
||||
{"epoch": 0.7891156462585034, "step": 523, "batch_size": 64, "mean": 59.72750473022461, "std": 111.90266418457031, "min": -304.899658203125, "p10": -85.91635360717773, "median": 72.92779159545898, "p90": 186.19492645263676, "max": 234.60650634765625, "pos_frac": 0.71875, "sample": [32.14847183227539, 216.42254638671875, -5.065364837646484, -24.75531768798828, 130.53448486328125, 99.1269302368164, 102.23731994628906, 39.638145446777344, 108.28134155273438, 30.089218139648438, -0.17506980895996094, 93.60591125488281, 221.30413818359375, -132.822265625, 66.65660858154297, 79.198974609375, 119.19839477539062, -10.614194869995117, 58.069915771484375, -161.18374633789062, 13.198455810546875, 189.93234252929688, -61.41304016113281, 64.77193450927734, -88.25616455078125, 168.84190368652344, -55.29161834716797, 32.23453903198242, 6.383979797363281, 169.2453155517578, 25.827993392944336, 144.54348754882812, 151.97879028320312, 129.96348571777344, 163.2422637939453, 173.99566650390625, 177.4742889404297, 40.008056640625, 17.486900329589844, -304.899658203125, 89.29483032226562, -148.76748657226562, 231.59085083007812, 116.4358901977539, 172.6514129638672, -32.21967315673828, -0.4789237976074219, 150.19976806640625, 106.26863098144531, -80.45679473876953, 231.1447296142578, 45.92350387573242, 135.64300537109375, -117.9406509399414, 151.4432373046875, 191.537353515625, 100.58140563964844, 234.60650634765625, -10.4886474609375, -172.54063415527344, -50.596702575683594, 93.09700012207031, 27.108001708984375, 137.35833740234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000523.npy"}
|
||||
{"epoch": 0.7906273620559335, "step": 524, "batch_size": 64, "mean": 63.550968170166016, "std": 97.16644287109375, "min": -192.327880859375, "p10": -47.116268920898435, "median": 52.82728576660156, "p90": 184.57815399169922, "max": 306.78656005859375, "pos_frac": 0.78125, "sample": [169.9861602783203, 36.291587829589844, 233.17276000976562, 71.24481201171875, -25.204132080078125, -13.288429260253906, -72.75330352783203, -45.585723876953125, 99.30726623535156, 87.62519836425781, 211.0572509765625, 0.9209060668945312, -72.1761245727539, 74.02975463867188, 53.54638671875, 219.4349365234375, -183.81192016601562, -47.772216796875, 21.582744598388672, 186.19235229492188, 173.79180908203125, -192.327880859375, 173.1845245361328, -1.0697174072265625, 20.107940673828125, 14.178993225097656, 177.9482421875, 106.8820571899414, 115.92546081542969, 10.368865966796875, -11.285385131835938, 100.78301239013672, 39.7423095703125, 93.2575912475586, 115.3701171875, 47.62276840209961, 23.740148544311523, 160.33932495117188, 24.332717895507812, -67.85270690917969, 52.108184814453125, 8.19610595703125, 5.676155090332031, 306.78656005859375, 43.95037078857422, 134.60504150390625, 180.8116912841797, 5.987728118896484, 46.58424377441406, 57.302276611328125, 110.99053192138672, 38.532752990722656, 57.37169647216797, 205.791259765625, 83.29344177246094, 147.5392303466797, 186.90853881835938, 108.3195571899414, -125.06396484375, -4.271537780761719, 175.58267211914062, -3.4396591186523438, 106.25627136230469, 8.602325439453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000524.npy"}
|
||||
{"epoch": 0.7921390778533636, "step": 525, "batch_size": 64, "mean": 52.26536560058594, "std": 110.68328857421875, "min": -241.12274169921875, "p10": -70.58736114501953, "median": 27.24135971069336, "p90": 200.2474304199219, "max": 325.4239501953125, "pos_frac": 0.625, "sample": [-30.702606201171875, -197.8973846435547, 3.964080810546875, -29.510128021240234, -117.3179702758789, 52.529541015625, 14.644920349121094, 62.2012939453125, 244.96743774414062, -8.066383361816406, 149.5977020263672, -12.435699462890625, 151.28399658203125, 27.155120849609375, 144.72802734375, 206.64212036132812, 110.76727294921875, -22.726341247558594, 203.3710174560547, -71.25051879882812, 84.4551010131836, -1.3767509460449219, 224.230712890625, 172.09188842773438, -21.375038146972656, 101.18208312988281, -14.194091796875, 104.79573059082031, 50.650550842285156, 112.27427673339844, 6.236328125, 215.55270385742188, 27.327598571777344, 108.57470703125, 82.31632995605469, -36.31977844238281, 46.51642990112305, -0.509307861328125, 325.4239501953125, -7.276216506958008, -69.03999328613281, 58.43158721923828, 0.6700096130371094, -241.12274169921875, -21.962730407714844, 192.9590606689453, -1.6643791198730469, 26.503089904785156, 25.608184814453125, 149.3641357421875, -80.36614227294922, 187.56222534179688, 5.260734558105469, -27.863525390625, 143.13449096679688, 285.3003234863281, -122.48773193359375, 73.55125427246094, 120.73008728027344, 129.91104125976562, -15.995697021484375, 179.23521423339844, -3.199735641479492, -112.05799865722656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000525.npy"}
|
||||
{"epoch": 0.7936507936507936, "step": 526, "batch_size": 64, "mean": 45.412139892578125, "std": 103.16321563720703, "min": -186.033203125, "p10": -79.28284378051758, "median": 46.295854568481445, "p90": 167.28943634033206, "max": 298.0430603027344, "pos_frac": 0.734375, "sample": [217.1687774658203, 8.745189666748047, -36.99134063720703, 5.199552536010742, -100.4170150756836, 8.940162658691406, -0.23023605346679688, 286.90533447265625, -173.22108459472656, -158.88307189941406, 43.653564453125, 79.644775390625, -161.17678833007812, 19.975555419921875, 161.6121368408203, -36.12061309814453, 59.75163650512695, 153.96788024902344, 12.185874938964844, 141.85487365722656, 80.05233764648438, 207.66824340820312, 35.14222717285156, 110.65310668945312, 182.83953857421875, 8.660465240478516, 70.12955474853516, 144.5775146484375, -79.55280303955078, 143.65948486328125, -4.838136672973633, 64.35704803466797, 51.216697692871094, -23.684412002563477, 169.72256469726562, 1.2164897918701172, 121.57756805419922, 20.997901916503906, 30.792633056640625, 28.075035095214844, 77.14568328857422, 27.849132537841797, -33.28080749511719, 136.32676696777344, 60.59675979614258, -179.08279418945312, 0.22206497192382812, 154.18572998046875, 112.35604858398438, -30.484344482421875, 88.21121215820312, -11.877513885498047, 298.0430603027344, 48.93814468383789, 188.44097900390625, 4.5098419189453125, -78.65293884277344, 67.47584533691406, 71.94991302490234, 66.58470153808594, 84.18353271484375, -40.180694580078125, 83.12130737304688, -186.033203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000526.npy"}
|
||||
{"epoch": 0.7951625094482238, "step": 527, "batch_size": 64, "mean": 41.600921630859375, "std": 110.15955352783203, "min": -208.99588012695312, "p10": -90.60811691284178, "median": 18.571941375732422, "p90": 197.46582183837893, "max": 292.06787109375, "pos_frac": 0.640625, "sample": [0.04540061950683594, 15.865585327148438, 50.017173767089844, 110.37786865234375, 22.759262084960938, 2.6734886169433594, 231.29489135742188, 93.30119323730469, 174.88059997558594, 292.06787109375, 175.37435913085938, -29.586761474609375, 194.69969177246094, 8.083229064941406, 44.28578186035156, -97.36052703857422, 228.4124755859375, 0.06379127502441406, 106.35153198242188, -74.85249328613281, -143.87228393554688, -62.61786651611328, -21.266067504882812, -116.40348815917969, 77.14476013183594, 11.483367919921875, 247.81491088867188, 73.00254821777344, 15.067546844482422, 13.0152587890625, 103.98103332519531, 21.278297424316406, -59.664398193359375, 40.07186508178711, 38.259342193603516, 198.65130615234375, 47.675926208496094, -21.626373291015625, -14.494077682495117, 185.44378662109375, 73.42506408691406, 14.62057876586914, 202.05209350585938, -5.356128692626953, -32.420867919921875, -23.323951721191406, -144.5489959716797, 186.92408752441406, -6.6024932861328125, -2.6773853302001953, -13.602752685546875, -0.04638099670410156, 167.76751708984375, 134.585205078125, -208.99588012695312, -31.871253967285156, 109.89474487304688, -18.361839294433594, 102.4171142578125, 213.71084594726562, -162.45907592773438, -193.22779846191406, 96.43363952636719, 22.423248291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000527.npy"}
|
||||
{"epoch": 0.7966742252456538, "step": 528, "batch_size": 64, "mean": 46.65412139892578, "std": 118.32176208496094, "min": -245.87319946289062, "p10": -64.39219284057617, "median": 23.55760097503662, "p90": 206.44230499267582, "max": 299.975830078125, "pos_frac": 0.703125, "sample": [130.4028778076172, -245.87319946289062, 200.10446166992188, -4.742265701293945, 48.306251525878906, 133.84793090820312, 69.45365905761719, -6.332122802734375, 12.524826049804688, -6.400054931640625, -5.7518157958984375, -59.6282958984375, 209.1585235595703, -182.80990600585938, 4.91172981262207, -2.217132568359375, 174.2123565673828, 13.406850814819336, 16.890562057495117, -66.0521240234375, -60.519020080566406, 295.4197998046875, -1.0514450073242188, -227.11404418945312, -7.209251403808594, 33.03662872314453, -59.303688049316406, -24.05425262451172, 21.246397018432617, 170.65206909179688, 81.74163818359375, 25.868804931640625, 35.115962982177734, 228.79470825195312, 131.24795532226562, 15.002958297729492, 224.1444549560547, 0.7322311401367188, 94.92686462402344, 112.70225524902344, 199.86990356445312, 213.1428680419922, 29.713361740112305, 17.406003952026367, -182.11114501953125, 105.72386169433594, 13.005847930908203, 14.925201416015625, 163.64053344726562, 0.553924560546875, 59.533203125, -167.4483642578125, -185.8500518798828, 299.975830078125, 227.56564331054688, 68.91389465332031, 4.932697296142578, -7.412254333496094, 175.50091552734375, 6.329568862915039, 146.81039428710938, 70.1777114868164, 134.28945922851562, 51.88053894042969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000528.npy"}
|
||||
{"epoch": 0.7981859410430839, "step": 529, "batch_size": 64, "mean": 94.36181640625, "std": 98.57301330566406, "min": -166.52133178710938, "p10": -9.795011901855466, "median": 99.5583724975586, "p90": 209.75982360839845, "max": 273.3290710449219, "pos_frac": 0.84375, "sample": [120.33847045898438, -64.80685424804688, 90.25996398925781, -6.524284362792969, 51.6561164855957, 1.7265167236328125, 273.3290710449219, 201.59344482421875, -55.60116195678711, 18.400100708007812, 12.689291000366211, 1.830587387084961, 23.1051025390625, 98.14640045166016, 175.86769104003906, 143.79434204101562, -75.92438507080078, 181.5583953857422, -6.823333740234375, 164.88592529296875, 211.67645263671875, -11.068588256835938, -16.6505126953125, 198.3056640625, 83.95207977294922, 179.568359375, 2.5645065307617188, 20.257240295410156, 182.90797424316406, 29.236221313476562, 225.52117919921875, 172.1601104736328, 108.29777526855469, 188.70855712890625, 115.52799987792969, 110.2778091430664, 185.91607666015625, 196.71701049804688, 34.91477584838867, 230.62319946289062, 219.3338623046875, 190.14974975585938, 244.4386749267578, 29.646133422851562, 158.17587280273438, 184.95614624023438, 70.48121643066406, -2.5931968688964844, 104.92304229736328, 37.176116943359375, 36.69904327392578, 76.5196762084961, 47.917724609375, 202.0572967529297, -132.55979919433594, 205.28768920898438, 100.97034454345703, 196.99588012695312, 127.81275939941406, 74.02037048339844, -166.52133178710938, 8.117652893066406, 12.587394714355469, 213.64910888671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000529.npy"}
|
||||
{"epoch": 0.799697656840514, "step": 530, "batch_size": 64, "mean": 58.45021057128906, "std": 101.71440887451172, "min": -197.09539794921875, "p10": -30.510904693603514, "median": 35.71017837524414, "p90": 188.18270568847657, "max": 322.9999694824219, "pos_frac": 0.71875, "sample": [293.1734619140625, 67.1080322265625, 22.785646438598633, -9.911479949951172, 6.873100280761719, 4.435760498046875, 268.21514892578125, -12.538398742675781, 159.9881134033203, 154.04661560058594, 14.967853546142578, 217.99765014648438, -15.474512100219727, 37.92902374267578, 116.58271789550781, 63.26782989501953, 76.30430603027344, 104.91169738769531, 156.5382843017578, 30.169029235839844, 1.4485893249511719, 3.4458999633789062, -197.09539794921875, 165.909912109375, 138.86810302734375, -0.2128276824951172, 99.32435607910156, 67.75032043457031, 19.77143096923828, 188.43655395507812, 16.979087829589844, 20.03551483154297, 235.18359375, 322.9999694824219, 52.11631774902344, 51.63768768310547, 47.83388137817383, 93.2231216430664, -76.12391662597656, 9.640033721923828, -30.922271728515625, 92.42982482910156, -20.21630096435547, 60.495269775390625, -90.5295181274414, -23.46831512451172, 184.43804931640625, -19.164291381835938, -146.34715270996094, -98.96078491210938, -9.263986587524414, -69.3959732055664, 187.59039306640625, -15.993968963623047, 199.4560089111328, 102.38624572753906, 141.6813507080078, -29.551048278808594, -9.548049926757812, 16.499252319335938, 178.87457275390625, 86.01579284667969, 2.2747154235839844, 33.4913330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000530.npy"}
|
||||
{"epoch": 0.8012093726379441, "step": 531, "batch_size": 64, "mean": 61.76841735839844, "std": 101.35540771484375, "min": -208.86048889160156, "p10": -49.86268157958984, "median": 46.12603950500488, "p90": 198.92458648681645, "max": 280.70928955078125, "pos_frac": 0.71875, "sample": [-52.98674011230469, 124.16012573242188, 230.9349365234375, 138.1405029296875, 99.18875885009766, 96.15750122070312, 3.3428268432617188, 280.70928955078125, 58.59068298339844, -15.865001678466797, 60.83778381347656, 0.1951885223388672, 258.9012451171875, 1.6368560791015625, 139.98638916015625, 172.21322631835938, -144.91256713867188, -61.88697052001953, 24.57225799560547, 187.9814910888672, 94.19056701660156, 235.38902282714844, -72.26885223388672, -14.333183288574219, -42.573211669921875, 18.257165908813477, 96.91231536865234, 74.844482421875, 17.807411193847656, 0.7710113525390625, 57.88776397705078, -12.142892837524414, -4.450590133666992, 31.232452392578125, -208.86048889160156, -5.550228118896484, 113.78610229492188, 148.05621337890625, 1.2717666625976562, -23.281723022460938, 28.844879150390625, 89.06725311279297, 188.08078002929688, 189.35427856445312, -84.28276062011719, -85.5205307006836, 189.45883178710938, 163.81683349609375, 105.55778503417969, 25.4901123046875, -20.92668914794922, 34.364315032958984, 86.23074340820312, 202.98133850097656, 97.51284790039062, -31.250736236572266, 86.29428100585938, -34.31093978881836, 184.96498107910156, 208.99905395507812, -3.6837692260742188, 1.227865219116211, 203.4639892578125, 18.60106658935547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000531.npy"}
|
||||
{"epoch": 0.8027210884353742, "step": 532, "batch_size": 64, "mean": 65.69293212890625, "std": 104.639892578125, "min": -173.15841674804688, "p10": -76.17063674926757, "median": 59.964250564575195, "p90": 200.77471466064455, "max": 256.430908203125, "pos_frac": 0.71875, "sample": [109.00503540039062, 79.46862030029297, 211.85430908203125, 194.69937133789062, -30.6982421875, -30.7396240234375, 138.49374389648438, 193.15457153320312, -1.8239021301269531, 156.24485778808594, -100.4768295288086, 147.0769805908203, 157.79164123535156, 62.0447998046875, 56.63414001464844, 33.9674072265625, 57.88370132446289, 105.83648681640625, 1.1429271697998047, -120.52122497558594, -49.68421936035156, 11.649332046508789, 188.2562255859375, 41.29100036621094, -18.505889892578125, 68.08451843261719, 105.42532348632812, 256.430908203125, 120.46615600585938, 237.41159057617188, 182.0865020751953, 11.698482513427734, -64.42697143554688, 205.50039672851562, 26.155601501464844, 76.38145446777344, 154.76373291015625, -81.2036361694336, 17.942474365234375, 34.837188720703125, -119.3536376953125, -13.735641479492188, 31.814373016357422, -34.67206954956055, 205.60379028320312, 43.10314178466797, 185.25918579101562, -61.40625762939453, 46.502471923828125, 203.37843322753906, 156.42320251464844, 110.43872833251953, -25.967422485351562, 207.15345764160156, -173.15841674804688, 94.14051818847656, -131.38165283203125, 139.56887817382812, 184.00137329101562, -87.2896499633789, 4.32615852355957, -13.015275955200195, 126.18110656738281, 180.83412170410156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000532.npy"}
|
||||
{"epoch": 0.8042328042328042, "step": 533, "batch_size": 64, "mean": 69.84703826904297, "std": 101.81482696533203, "min": -187.86077880859375, "p10": -45.718721771240226, "median": 66.5691032409668, "p90": 189.30201721191406, "max": 288.293212890625, "pos_frac": 0.71875, "sample": [73.72306060791016, 181.26516723632812, 20.70484161376953, 35.106956481933594, 113.61244201660156, 84.12104034423828, 14.464675903320312, -34.1370735168457, 182.81544494628906, 135.32113647460938, -70.3052978515625, -2.3500938415527344, -5.669879913330078, -0.14798545837402344, 100.64387512207031, 288.293212890625, 15.242341995239258, 140.57196044921875, -55.426177978515625, -4.899497985839844, 11.766654968261719, -3.0499629974365234, 124.64796447753906, 8.911529541015625, -61.620399475097656, 145.41659545898438, 168.50350952148438, 59.41514587402344, 0.8816757202148438, -174.99839782714844, -33.25340270996094, -15.55521011352539, 187.1897735595703, 154.10400390625, -2.545602798461914, 233.25450134277344, 76.6904296875, 282.1768798828125, 45.71123504638672, -50.68228530883789, 23.477935791015625, 168.60888671875, 143.25437927246094, 126.67037200927734, 189.28097534179688, 193.96490478515625, 80.57429504394531, -87.92840576171875, 21.25360870361328, -0.46411895751953125, 8.804235458374023, 175.43695068359375, 155.48822021484375, -28.280990600585938, 1.3708972930908203, 80.7610092163086, 187.96153259277344, 145.53079223632812, 21.773956298828125, 189.31103515625, 189.75704956054688, 212.94151306152344, -187.86077880859375, 88.607421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000533.npy"}
|
||||
{"epoch": 0.8057445200302343, "step": 534, "batch_size": 64, "mean": 61.96025085449219, "std": 114.0582046508789, "min": -209.31350708007812, "p10": -74.1390167236328, "median": 48.716796875, "p90": 205.93677062988283, "max": 326.02960205078125, "pos_frac": 0.65625, "sample": [48.39122772216797, 6.666799545288086, 218.72528076171875, 17.236804962158203, -29.614530563354492, 83.66216278076172, 5.6859283447265625, -209.31350708007812, 109.371337890625, -106.82408142089844, -8.011041641235352, 39.487701416015625, 191.3732147216797, 167.91281127929688, -107.9982681274414, 326.02960205078125, 129.03500366210938, 35.77377700805664, 52.348052978515625, -27.75048828125, 154.01092529296875, 259.37249755859375, 107.40155029296875, 74.19815063476562, -11.788902282714844, -165.88433837890625, -168.6077880859375, -9.682943344116211, 225.61328125, 9.929840087890625, 133.0626678466797, -81.51429748535156, 223.21682739257812, 189.89195251464844, 196.782958984375, 107.13470458984375, -76.834228515625, 49.04236602783203, 277.01593017578125, 190.76742553710938, 74.08686828613281, 204.5463409423828, -53.70183563232422, -7.652679443359375, 168.2762908935547, 124.39361572265625, -0.7730712890625, 129.04771423339844, 30.201637268066406, -27.118148803710938, -8.319229125976562, 206.5326690673828, 100.92548370361328, -12.151073455810547, -25.643783569335938, 12.059829711914062, 157.83148193359375, -67.85018920898438, 110.89976501464844, -36.91429138183594, 57.58551788330078, 41.16474151611328, 186.47174072265625, -23.759441375732422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000534.npy"}
|
||||
{"epoch": 0.8072562358276644, "step": 535, "batch_size": 64, "mean": 54.59990310668945, "std": 106.66319274902344, "min": -203.46875, "p10": -57.28538742065428, "median": 33.36092758178711, "p90": 197.31446838378912, "max": 261.27911376953125, "pos_frac": 0.625, "sample": [34.331748962402344, -37.7526741027832, 129.7349395751953, 45.469417572021484, 20.203514099121094, 23.949649810791016, -190.8900604248047, -64.18612670898438, -114.35747528076172, -0.1204833984375, 156.52142333984375, -106.03730773925781, 45.98455047607422, 157.3564453125, 3.7956085205078125, -15.108177185058594, -16.139617919921875, 64.07796478271484, -0.5719051361083984, -34.19206237792969, 30.31787872314453, 83.83970642089844, 174.61148071289062, -69.18203735351562, 147.47250366210938, 19.913002014160156, 217.15130615234375, -137.76324462890625, -41.18366241455078, 2.9677371978759766, 158.1767120361328, 220.9705810546875, 114.85116577148438, -7.281684875488281, 96.29893493652344, 182.1897735595703, 32.390106201171875, 261.27911376953125, -6.579833984375, 127.81230926513672, 46.64398956298828, -38.251800537109375, 254.02716064453125, 170.6686248779297, -18.674636840820312, -203.46875, -0.6168975830078125, 141.92022705078125, -11.19906997680664, 111.80115509033203, 91.15074920654297, -10.883064270019531, 44.658443450927734, 213.72930908203125, 4.930274963378906, 174.6951904296875, -1.0799407958984375, 129.3050537109375, 185.56381225585938, -38.968017578125, 103.31890869140625, -18.272911071777344, 250.72409057617188, 202.3504638671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000535.npy"}
|
||||
{"epoch": 0.8087679516250945, "step": 536, "batch_size": 64, "mean": 87.3644790649414, "std": 95.9031982421875, "min": -74.12467956542969, "p10": -15.269719123840327, "median": 68.4739761352539, "p90": 220.78829040527344, "max": 297.2413024902344, "pos_frac": 0.8125, "sample": [68.89120483398438, 24.876991271972656, -53.291236877441406, 190.7432098388672, 167.5601806640625, 24.514389038085938, 28.2972412109375, -27.955398559570312, 113.83792114257812, 12.627473831176758, -74.12467956542969, 150.53921508789062, 113.3314437866211, 225.97402954101562, 180.17222595214844, 151.55612182617188, 94.60238647460938, 197.38467407226562, 272.17059326171875, 66.16380310058594, 3.2494029998779297, 7.663057327270508, 194.37612915039062, 196.14016723632812, 38.54389953613281, 209.00872802734375, 160.01878356933594, 5.301713943481445, 85.65750885009766, 133.9070281982422, 220.67269897460938, -0.5309562683105469, 3.214824676513672, 90.75837707519531, -25.0301513671875, -27.149261474609375, 196.4141387939453, 7.8426971435546875, 226.81227111816406, 2.6111507415771484, 220.83782958984375, 206.6421661376953, 32.6036376953125, -4.229318618774414, 13.284526824951172, 11.883285522460938, 10.100807189941406, 71.04097747802734, 73.23078918457031, 297.2413024902344, -3.229055404663086, 186.66592407226562, 68.05674743652344, 236.4295654296875, 111.64768981933594, 58.10945129394531, 21.07384490966797, 255.2512664794922, -9.717168807983398, -73.67008972167969, 33.37749481201172, 137.78895568847656, -17.649383544921875, -2.7985687255859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000536.npy"}
|
||||
{"epoch": 0.8102796674225246, "step": 537, "batch_size": 64, "mean": 69.3580322265625, "std": 110.57966613769531, "min": -156.09408569335938, "p10": -58.621600341796864, "median": 59.61408615112305, "p90": 198.28935699462892, "max": 326.51068115234375, "pos_frac": 0.6875, "sample": [158.577392578125, 106.05121612548828, -16.7908935546875, 154.217529296875, 248.60601806640625, 188.22035217285156, 125.93122863769531, 0.15673255920410156, 149.4761962890625, 85.91864013671875, 94.1369857788086, -5.688604354858398, 197.3441925048828, 198.69442749023438, 131.85105895996094, 169.1741943359375, 193.8622283935547, 4.617605209350586, 0.4285430908203125, -6.561758041381836, -0.5872611999511719, 74.66949462890625, 20.075973510742188, 9.821708679199219, 0.13132286071777344, 233.912109375, 136.1231689453125, 19.800392150878906, -118.42903900146484, -8.17384147644043, 44.558677673339844, 32.111881256103516, 137.53712463378906, -47.751487731933594, -125.58949279785156, 22.753536224365234, 288.22052001953125, 106.24452209472656, -1.944427490234375, -17.873790740966797, 19.734619140625, 194.56643676757812, -147.05149841308594, 104.52821350097656, -32.70233154296875, -14.754049301147461, 139.53387451171875, -24.415321350097656, 130.9912567138672, 81.30720520019531, -10.38714599609375, 139.68746948242188, -29.27770233154297, 177.4264678955078, 188.63003540039062, 326.51068115234375, -110.39350891113281, 145.0817413330078, -95.32875061035156, 34.03303527832031, 205.97509765625, 250.75836181640625, -156.09408569335938, -63.28022003173828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000537.npy"}
|
||||
{"epoch": 0.8117913832199547, "step": 538, "batch_size": 64, "mean": 73.21534729003906, "std": 98.36906433105469, "min": -186.17816162109375, "p10": -34.94748306274414, "median": 57.86605453491211, "p90": 185.5406951904297, "max": 341.31048583984375, "pos_frac": 0.78125, "sample": [-16.51372528076172, 183.30075073242188, 37.60907745361328, 16.44186019897461, 94.68336486816406, -186.17816162109375, 74.60708618164062, -35.05067443847656, 111.10755920410156, 160.5150909423828, 86.31549835205078, 186.50067138671875, 36.723907470703125, 160.89820861816406, 14.021406173706055, 43.931793212890625, 161.1498565673828, 170.87889099121094, 20.637168884277344, 34.90572738647461, 45.38873291015625, 2.6488265991210938, 194.4925994873047, 156.64083862304688, 44.892127990722656, -70.92269897460938, 160.3651580810547, -46.280128479003906, 227.6964111328125, -3.1710586547851562, -34.706703186035156, 182.80422973632812, -19.136474609375, -13.281585693359375, 7.431983947753906, 43.89836883544922, 40.484710693359375, 66.09939575195312, 3.8069915771484375, 44.81028747558594, 280.3827209472656, 5.964813232421875, 232.13955688476562, 100.16522216796875, -109.2562484741211, 137.0637664794922, -48.907920837402344, 182.93612670898438, 163.72482299804688, 341.31048583984375, 181.50143432617188, 53.68318176269531, 75.12492370605469, -111.7574462890625, 138.67132568359375, 106.59089660644531, 214.64675903320312, -0.9918785095214844, 111.24903869628906, 42.679229736328125, 62.048927307128906, 64.31578063964844, 89.14624786376953, -17.096782684326172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000538.npy"}
|
||||
{"epoch": 0.8133030990173847, "step": 539, "batch_size": 64, "mean": 56.014408111572266, "std": 100.44036865234375, "min": -152.34103393554688, "p10": -34.87076416015624, "median": 25.211605072021484, "p90": 201.52527465820313, "max": 332.86798095703125, "pos_frac": 0.703125, "sample": [64.1229476928711, 97.23295593261719, 2.27618408203125, 219.12033081054688, 11.609518051147461, -26.30524444580078, 201.99169921875, 185.61257934570312, 135.11505126953125, 10.662055969238281, 48.05998229980469, 25.20782470703125, 99.84207153320312, -0.440948486328125, -37.7535400390625, -11.499229431152344, -7.6947479248046875, -3.3292388916015625, 207.43251037597656, 132.86000061035156, 1.4271869659423828, -52.706783294677734, 45.67059326171875, -116.90484619140625, 10.711814880371094, -152.34103393554688, -18.847402572631836, 155.62408447265625, 261.19580078125, 28.36224365234375, 25.21538543701172, -11.579681396484375, -28.144287109375, 27.26262664794922, 289.2535095214844, 63.67438507080078, 8.953315734863281, 73.74227905273438, 200.43695068359375, -1.809835433959961, -16.434890747070312, 68.20664978027344, 119.49093627929688, 22.441123962402344, 90.492431640625, 82.32685852050781, -99.87535858154297, 147.032470703125, 8.418510437011719, -21.291473388671875, 187.36976623535156, 64.35771942138672, 16.764812469482422, -50.364341735839844, 170.68455505371094, 70.54086303710938, -8.761856079101562, 15.456737518310547, -151.0068817138672, 104.40216827392578, 12.480318069458008, 235.9630126953125, 332.86798095703125, 20.04095458984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000539.npy"}
|
||||
{"epoch": 0.8148148148148148, "step": 540, "batch_size": 64, "mean": 84.75627136230469, "std": 100.54325866699219, "min": -148.25538635253906, "p10": -40.06115112304686, "median": 68.67847061157227, "p90": 214.361296081543, "max": 282.1188049316406, "pos_frac": 0.765625, "sample": [-4.274261474609375, -2.51300048828125, 282.1188049316406, 194.67303466796875, 4.749715805053711, -148.25538635253906, 142.53871154785156, 133.62144470214844, 78.31652069091797, -2.152873992919922, 42.93612289428711, 174.99337768554688, 185.5958251953125, 43.018211364746094, -106.76551055908203, 59.04042053222656, 168.42474365234375, 142.1083984375, 15.537700653076172, 141.2306365966797, 26.203201293945312, -47.25968933105469, 128.406982421875, 34.571380615234375, 235.54833984375, 161.02081298828125, -46.07805633544922, 152.6907958984375, -75.02450561523438, 27.698410034179688, 108.0975341796875, 193.86521911621094, 237.42498779296875, 162.4250030517578, 220.11135864257812, -7.626335144042969, -26.021705627441406, 193.651611328125, -47.80442810058594, 2.2732486724853516, -0.6992149353027344, 217.16224670410156, 177.25119018554688, 207.82574462890625, 202.6405792236328, 33.18049621582031, 227.35340881347656, 39.306514739990234, 90.10630798339844, -92.39702606201172, 37.989418029785156, 122.32803344726562, 52.871002197265625, 8.020088195800781, -1.4375534057617188, 45.980804443359375, -8.649765014648438, 243.80140686035156, 37.11576843261719, 104.01992797851562, 203.23361206054688, 13.630681991577148, 84.29275512695312, 200.35806274414062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000540.npy"}
|
||||
{"epoch": 0.8163265306122449, "step": 541, "batch_size": 64, "mean": 73.00025939941406, "std": 95.63221740722656, "min": -182.3061981201172, "p10": -40.011634826660156, "median": 73.3428955078125, "p90": 190.6874969482422, "max": 240.96334838867188, "pos_frac": 0.6875, "sample": [175.14605712890625, 76.19783020019531, 98.72488403320312, 188.6483154296875, -12.012496948242188, 146.97225952148438, -73.75570678710938, 188.01976013183594, 180.9322509765625, 8.68243408203125, 25.151077270507812, -41.242218017578125, -12.26373291015625, 139.83746337890625, -8.992477416992188, -182.3061981201172, 175.98155212402344, 176.30191040039062, 70.48796081542969, 191.56143188476562, -62.77143096923828, 37.058807373046875, 195.43064880371094, 198.12228393554688, 171.82662963867188, 119.30958557128906, 230.48489379882812, -37.14027404785156, -68.4732666015625, -5.295345306396484, 118.9283218383789, 25.07213020324707, 192.1659698486328, 240.96334838867188, 11.749717712402344, 126.53868103027344, 113.66383361816406, -11.0203857421875, 137.64212036132812, -1.910684585571289, -4.565435409545898, -95.04318237304688, 166.29393005371094, 175.7310028076172, 15.411066055297852, 102.10334777832031, 155.74488830566406, 171.66165161132812, -83.70097351074219, 34.26545715332031, -0.2258167266845703, 167.3613739013672, 66.00548553466797, 59.59867858886719, -18.015518188476562, -2.70849609375, 79.01654052734375, 86.29722595214844, -21.282602310180664, 32.491310119628906, 126.96340942382812, -6.30906867980957, 195.15687561035156, 25.34729766845703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000541.npy"}
|
||||
{"epoch": 0.817838246409675, "step": 542, "batch_size": 64, "mean": 81.22418212890625, "std": 112.80807495117188, "min": -239.02015686035156, "p10": -73.9563552856445, "median": 80.09470748901367, "p90": 210.5596862792969, "max": 289.75518798828125, "pos_frac": 0.78125, "sample": [156.78274536132812, 181.39788818359375, 16.100303649902344, 274.14617919921875, 87.57097625732422, 17.640289306640625, 128.09368896484375, 62.50904846191406, 178.72372436523438, 92.06624603271484, 2.808177947998047, -18.735641479492188, -93.74579620361328, 208.107421875, 16.720457077026367, 169.1114501953125, 54.35381317138672, 289.75518798828125, -147.67527770996094, 189.21725463867188, 188.42437744140625, 248.7271728515625, 55.76866149902344, -0.6374130249023438, -137.1987762451172, -239.02015686035156, 135.253662109375, 55.736328125, 148.08355712890625, -50.77899169921875, 188.26846313476562, 198.15757751464844, 0.9915599822998047, 211.61065673828125, -110.03417205810547, -8.708568572998047, -1.745330810546875, 116.88746643066406, 1.9298248291015625, 132.545654296875, 9.31646728515625, 22.222808837890625, 198.08346557617188, -83.88951110839844, 87.00608825683594, -0.5472946166992188, 197.22286987304688, 29.782127380371094, 80.55768585205078, 216.72003173828125, 149.3890380859375, 214.43994140625, -90.28700256347656, 46.750709533691406, -7.511072158813477, 5.412494659423828, 19.586654663085938, 192.82470703125, 171.90496826171875, 69.5842514038086, 79.63172912597656, 234.39736938476562, 158.84359741210938, 197.69546508789062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000542.npy"}
|
||||
{"epoch": 0.8193499622071051, "step": 543, "batch_size": 64, "mean": 78.20692443847656, "std": 103.87948608398438, "min": -216.23727416992188, "p10": -30.59868698120117, "median": 77.37621688842773, "p90": 207.34549407958986, "max": 272.4300231933594, "pos_frac": 0.8125, "sample": [13.390426635742188, 29.618648529052734, 75.72149658203125, 0.5465831756591797, 127.00077819824219, 10.995426177978516, 153.98013305664062, 199.26107788085938, -181.4714813232422, 247.591796875, 94.60195922851562, 167.15708923339844, 156.2630157470703, 205.14710998535156, 238.83963012695312, 120.17036437988281, 183.09439086914062, -127.30335998535156, 75.47927856445312, 208.28765869140625, 15.824041366577148, -12.158653259277344, 0.09697341918945312, 54.5118408203125, 7.295635223388672, 186.73538208007812, -216.23727416992188, 1.3375244140625, -39.06449508666992, -56.60736083984375, 39.68074035644531, 174.02880859375, 236.25421142578125, 2.023029327392578, 177.8466033935547, 88.50553894042969, 193.08636474609375, -3.757913589477539, -27.964698791503906, -7.395851135253906, 88.31643676757812, 130.5202178955078, 210.50552368164062, 33.29864501953125, 187.14361572265625, 24.67444610595703, 122.43852996826172, 272.4300231933594, 34.1612663269043, 125.2486572265625, -16.636302947998047, 79.03093719482422, 108.81036376953125, -31.7275390625, 133.74012756347656, 51.575164794921875, 119.00863647460938, 36.394134521484375, 12.804244995117188, -65.94802856445312, 169.21664428710938, 237.30914306640625, 4.882534027099609, 125.63352966308594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000543.npy"}
|
||||
{"epoch": 0.8208616780045351, "step": 544, "batch_size": 64, "mean": 85.57197570800781, "std": 116.69859313964844, "min": -153.94747924804688, "p10": -53.23086853027343, "median": 87.99333953857422, "p90": 244.0223602294922, "max": 299.7696533203125, "pos_frac": 0.75, "sample": [8.9014892578125, -153.93649291992188, 70.1798095703125, 28.155351638793945, 136.54437255859375, 85.87078857421875, -109.07878112792969, 31.81833839416504, -46.76593017578125, -6.422019958496094, 46.099639892578125, 90.11589050292969, -29.272857666015625, 244.62615966796875, 112.77373504638672, -7.562387466430664, -153.94747924804688, 122.37749481201172, 123.6529312133789, 75.44357299804688, 139.4990692138672, 150.0955810546875, 168.74591064453125, 179.51193237304688, 240.77835083007812, 42.65699005126953, -2.7502899169921875, -11.129890441894531, 1.3509349822998047, -153.22225952148438, 299.7696533203125, -148.87413024902344, 185.0338897705078, -8.424285888671875, 197.40895080566406, 175.17930603027344, 253.5352783203125, -34.046417236328125, 42.539207458496094, 94.20177459716797, 183.0216827392578, 41.870811462402344, 249.85186767578125, 69.35462951660156, 75.5356674194336, 99.77301025390625, 125.53306579589844, 159.54428100585938, 242.61349487304688, -56.001556396484375, 35.047279357910156, 167.95318603515625, 289.9385986328125, 274.676513671875, -84.73487091064453, -21.892227172851562, 219.71749877929688, 33.69622802734375, 197.4219512939453, 158.29965209960938, 2.9311065673828125, 98.32272338867188, 134.81283569335938, 297.88592529296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000544.npy"}
|
||||
{"epoch": 0.8223733938019653, "step": 545, "batch_size": 64, "mean": 51.16197967529297, "std": 105.5416030883789, "min": -209.7698516845703, "p10": -75.04078216552733, "median": 30.494722366333008, "p90": 197.6060256958008, "max": 243.62339782714844, "pos_frac": 0.6875, "sample": [130.69810485839844, 110.26641082763672, -68.86882019042969, 209.82872009277344, 16.872264862060547, -7.272926330566406, -64.90763092041016, -77.70415496826172, -43.198951721191406, 39.25092697143555, 147.16989135742188, -6.721710205078125, 191.13233947753906, 70.51512145996094, 66.06475067138672, 2.9782791137695312, 71.99394226074219, 204.07061767578125, -69.29324340820312, 107.29620361328125, -24.50464630126953, 215.9384765625, -25.32373809814453, 154.90560913085938, 108.11151123046875, -27.747825622558594, 91.6407699584961, 28.653268814086914, -31.062423706054688, 3.589872360229492, -3.8374595642089844, -77.50401306152344, -109.45828247070312, 149.47598266601562, -84.797607421875, 7.517877578735352, 188.0421142578125, 200.38046264648438, 158.0803985595703, 7.133872985839844, -10.589374542236328, 243.35940551757812, 168.57899475097656, 11.596397399902344, 25.14159393310547, 88.41519927978516, -47.877952575683594, 129.39309692382812, 75.66952514648438, -209.7698516845703, 31.9708251953125, 79.673583984375, 21.20355224609375, 243.62339782714844, 99.68321228027344, -129.51272583007812, 5.037174224853516, 146.174072265625, -196.7918701171875, 1.772745132446289, 99.38556671142578, 180.15069580078125, 229.65640258789062, 29.018619537353516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000545.npy"}
|
||||
{"epoch": 0.8238851095993953, "step": 546, "batch_size": 64, "mean": 56.60765075683594, "std": 113.63751220703125, "min": -242.58477783203125, "p10": -114.91747741699218, "median": 50.54178237915039, "p90": 187.38251037597658, "max": 312.639404296875, "pos_frac": 0.765625, "sample": [193.93023681640625, -53.18206787109375, 183.79644775390625, 68.1317367553711, 106.71879577636719, 163.5875244140625, -13.341278076171875, -54.82295227050781, 45.224456787109375, -111.64163208007812, -242.58477783203125, 160.0977020263672, 6.700275421142578, 29.413055419921875, -178.79086303710938, 260.5311584472656, 11.29017448425293, -12.200820922851562, -61.64508819580078, 27.398399353027344, -164.395751953125, 55.859107971191406, 116.4647216796875, 132.0566864013672, 11.200363159179688, 85.08645629882812, 158.46249389648438, -53.31549072265625, 147.6513214111328, 211.94906616210938, 156.94679260253906, 18.31287384033203, 166.04421997070312, 43.26106262207031, 24.685909271240234, -116.654052734375, 36.196937561035156, 312.639404296875, 134.3497772216797, 103.6641845703125, 1.8950080871582031, 32.183204650878906, -116.3214111328125, 125.38774871826172, 26.001611709594727, 116.7256088256836, -19.446731567382812, 18.893474578857422, 97.35070037841797, 103.18939208984375, 163.67874145507812, 37.50373840332031, 227.61181640625, -133.78515625, 12.881553649902344, 77.42298889160156, 158.0199737548828, 93.70146179199219, 188.3924560546875, 73.63282012939453, 202.33084106445312, -166.4364471435547, 185.02597045898438, 7.973791122436523], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000546.npy"}
|
||||
{"epoch": 0.8253968253968254, "step": 547, "batch_size": 64, "mean": 81.24012756347656, "std": 109.72623443603516, "min": -223.16961669921875, "p10": -8.668854331970211, "median": 73.44515228271484, "p90": 213.3566131591797, "max": 346.21295166015625, "pos_frac": 0.828125, "sample": [3.8133163452148438, 35.094940185546875, 10.562398910522461, 14.266189575195312, 258.010009765625, 94.48377990722656, -83.6477279663086, 238.52389526367188, 111.54823303222656, 157.3240509033203, 214.03533935546875, 147.38925170898438, 41.407981872558594, 82.6784439086914, 74.84121704101562, -10.601921081542969, 126.45512390136719, 183.0754852294922, 30.694149017333984, 26.182235717773438, 220.5764923095703, 187.78822326660156, 186.86831665039062, 7.5074462890625, 260.5992736816406, 161.43313598632812, -2.2044525146484375, 99.19950103759766, 70.13153076171875, 7.257293701171875, 110.18305969238281, -3.2969837188720703, -219.05718994140625, 112.91201782226562, 14.903861999511719, 72.04908752441406, 200.00872802734375, 12.723094940185547, 211.77291870117188, 67.58282470703125, 135.77017211914062, 185.8837127685547, 125.04725646972656, 160.7081756591797, -163.4306640625, 75.11228942871094, 14.463432312011719, -223.16961669921875, 46.130950927734375, 23.991867065429688, 211.2932891845703, 195.9617462158203, 346.21295166015625, -1.1063461303710938, 88.09210968017578, -18.49969482421875, 18.780155181884766, 197.467041015625, -4.158365249633789, 43.261558532714844, 11.866933822631836, 8.079879760742188, 229.16635131835938, -42.6317138671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000547.npy"}
|
||||
{"epoch": 0.8269085411942555, "step": 548, "batch_size": 64, "mean": 51.61526107788086, "std": 102.85539245605469, "min": -147.0353240966797, "p10": -82.03423156738282, "median": 41.92956352233887, "p90": 198.5491729736328, "max": 252.97128295898438, "pos_frac": 0.65625, "sample": [154.99256896972656, 164.51123046875, -48.604705810546875, -62.291107177734375, -147.0353240966797, 77.17720031738281, -9.564453125, 235.77273559570312, 167.7545166015625, 47.86261749267578, -36.130157470703125, 197.0558624267578, 0.3383445739746094, 15.008506774902344, 53.53550720214844, -3.9277172088623047, -82.60200500488281, 39.77489471435547, -8.516304016113281, 125.144287109375, 240.98904418945312, 59.09342956542969, -89.41294860839844, 18.31873321533203, 148.7388916015625, 42.95637130737305, 225.093017578125, 200.3557586669922, 21.968292236328125, 111.30125427246094, -64.16154479980469, 118.643310546875, -15.2330322265625, 135.21775817871094, 32.86622619628906, -57.35926818847656, 212.7882843017578, 40.90275573730469, -63.78599548339844, 132.56954956054688, 64.0013656616211, -5.876550674438477, 46.33092498779297, -144.203369140625, 252.97128295898438, -6.041656494140625, -13.295692443847656, 77.71366882324219, -127.83377838134766, 129.69195556640625, 83.47457885742188, -115.93492126464844, 44.14075469970703, 155.4005889892578, 103.72325897216797, 11.365058898925781, 165.58729553222656, -14.388702392578125, 24.006072998046875, -82.87214660644531, 199.1891632080078, 179.36334228515625, -80.70942687988281, 25.467239379882812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000548.npy"}
|
||||
{"epoch": 0.8284202569916855, "step": 549, "batch_size": 64, "mean": 62.981529235839844, "std": 119.12715148925781, "min": -244.2254638671875, "p10": -82.94537277221677, "median": 58.96900749206543, "p90": 200.02432556152345, "max": 324.107666015625, "pos_frac": 0.671875, "sample": [-1.818603515625, 23.629920959472656, -59.49292755126953, 87.53873443603516, 7.952766418457031, -93.88475036621094, 98.59709930419922, 4.188295364379883, 66.52474212646484, 217.2274169921875, 254.27748107910156, 2.519500732421875, 89.95134735107422, 196.79393005371094, -4.604339599609375, -51.277931213378906, -109.59326171875, 81.54978942871094, 148.7673797607422, 175.8867950439453, 185.4295654296875, 147.1328582763672, 269.6960144042969, -112.03349304199219, 72.77725219726562, 14.817962646484375, 35.02444076538086, 200.57339477539062, 163.2423095703125, -63.685508728027344, 198.7431640625, 187.6951904296875, 324.107666015625, -29.457124710083008, 168.0478515625, 274.3061218261719, -14.222036361694336, -167.98568725585938, -17.785552978515625, 159.0355224609375, -244.2254638671875, 33.93370056152344, -13.181838989257812, 54.922359466552734, 56.143672943115234, 153.157958984375, -38.6705322265625, 141.4271697998047, 243.01394653320312, 61.794342041015625, -1.111724853515625, 149.36077880859375, 160.36773681640625, 96.34733581542969, 76.39413452148438, -15.9459228515625, 5.368535995483398, -5.211460113525391, -31.29910659790039, 114.59117889404297, 188.61285400390625, -201.5321807861328, -91.19960021972656, 7.5664215087890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000549.npy"}
|
||||
{"epoch": 0.8299319727891157, "step": 550, "batch_size": 64, "mean": 41.31805419921875, "std": 102.05292510986328, "min": -201.31712341308594, "p10": -87.51206054687499, "median": 25.27721881866455, "p90": 178.533219909668, "max": 290.9773864746094, "pos_frac": 0.71875, "sample": [89.62802124023438, 213.6195068359375, 194.17063903808594, 17.17047119140625, -117.71817016601562, 11.921913146972656, 1.1244678497314453, -33.866455078125, -22.700523376464844, 93.89830017089844, 175.59747314453125, 49.172080993652344, 12.790458679199219, 1.5841293334960938, 14.35893440246582, 220.79830932617188, 212.54623413085938, 86.35553741455078, 26.62908935546875, 27.86886215209961, 156.28521728515625, -18.434017181396484, 133.73306274414062, 99.67889404296875, 165.9856414794922, -46.25971984863281, 290.9773864746094, -10.937908172607422, 12.9227294921875, -113.57685852050781, -132.5167694091797, 29.745864868164062, -58.3271484375, 94.72138977050781, 5.053070068359375, 65.4216079711914, 77.53997039794922, 19.154388427734375, -60.72388458251953, 13.565544128417969, -93.32814025878906, 94.57771301269531, 31.537662506103516, 70.01312255859375, -17.058334350585938, 138.2800750732422, 15.497711181640625, 14.59697151184082, 59.35157775878906, 135.4913787841797, -143.05862426757812, 111.27680969238281, 194.92291259765625, 52.895225524902344, 162.4650115966797, 23.92534828186035, -201.31712341308594, 11.685789108276367, 179.79139709472656, -176.74435424804688, 134.3409881591797, -60.23767852783203, -19.566593170166016, -73.94120788574219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000550.npy"}
|
||||
{"epoch": 0.8314436885865457, "step": 551, "batch_size": 64, "mean": 53.629493713378906, "std": 96.5870132446289, "min": -228.29632568359375, "p10": -23.551815795898435, "median": 22.79295539855957, "p90": 185.2851425170899, "max": 426.33111572265625, "pos_frac": 0.734375, "sample": [-20.223098754882812, 77.24366760253906, 99.953857421875, 47.931304931640625, 108.0009765625, 39.81134796142578, 426.33111572265625, 5.127971649169922, 35.187164306640625, 22.41332244873047, -19.78177261352539, 28.04601287841797, 170.85006713867188, -3.1428050994873047, 45.7569580078125, 23.172588348388672, 165.68853759765625, 21.835655212402344, -228.29632568359375, -17.40428924560547, -12.67576789855957, 7.216819763183594, 114.30878448486328, 39.038116455078125, 97.64228057861328, 191.55795288085938, -24.978408813476562, 89.60272979736328, 43.55137634277344, 164.6520233154297, 19.870203018188477, -48.246009826660156, -0.40375518798828125, -27.18359375, 38.3670654296875, 256.679931640625, 6.7140350341796875, 152.35159301757812, -19.87140464782715, 4.7770233154296875, 10.338424682617188, 30.527305603027344, -6.705818176269531, 1.2621803283691406, 171.99871826171875, 240.60662841796875, 190.9793243408203, 17.143878936767578, 18.82817840576172, 2.9409523010253906, 66.74109649658203, 111.4852066040039, 39.620269775390625, -45.062984466552734, 20.566383361816406, -5.3018646240234375, -11.31387710571289, 220.94834899902344, 15.952850341796875, 11.008007049560547, -86.97769165039062, -30.305866241455078, 192.20249938964844, 133.33212280273438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000551.npy"}
|
||||
{"epoch": 0.8329554043839759, "step": 552, "batch_size": 64, "mean": 62.658329010009766, "std": 94.27972412109375, "min": -206.656005859375, "p10": -28.236582183837886, "median": 64.85116195678711, "p90": 183.89151763916016, "max": 262.97052001953125, "pos_frac": 0.796875, "sample": [12.193925857543945, 38.907470703125, 24.21198272705078, -24.187843322753906, 137.34762573242188, 49.59151840209961, 2.649534225463867, 41.57102966308594, 37.37739562988281, 143.89166259765625, 229.09945678710938, 262.97052001953125, 121.19808959960938, 63.86265563964844, -17.373764038085938, -78.06000518798828, 16.733718872070312, 132.15769958496094, 61.09088897705078, -29.971755981445312, 54.15431213378906, 115.27439880371094, 117.94569396972656, 155.83973693847656, 147.1671142578125, -17.744384765625, 69.08910369873047, 136.47203063964844, 189.1511688232422, -206.656005859375, 4.03944206237793, -2.3310317993164062, 70.90536499023438, -187.3629150390625, 84.77689361572266, -0.11313629150390625, 172.45416259765625, 36.96669006347656, 11.708549499511719, -7.2154388427734375, 196.05398559570312, 19.039764404296875, -45.218849182128906, 101.64497375488281, 183.62924194335938, 16.622833251953125, 71.80754089355469, 65.83966827392578, -205.60569763183594, 199.7195281982422, 198.1878662109375, 105.25418853759766, 12.91291618347168, -58.90113067626953, 114.06272888183594, 89.2278060913086, 78.48332977294922, 95.29931640625, 184.00392150878906, 98.9544677734375, 57.069847106933594, 30.117698669433594, 162.2677764892578, 69.8758316040039], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000552.npy"}
|
||||
{"epoch": 0.8344671201814059, "step": 553, "batch_size": 64, "mean": 62.072757720947266, "std": 114.12794494628906, "min": -203.95782470703125, "p10": -62.15536689758301, "median": 47.22665023803711, "p90": 229.1072219848633, "max": 322.54388427734375, "pos_frac": 0.671875, "sample": [251.678466796875, -26.738616943359375, -5.1392669677734375, 94.6861343383789, 8.836479187011719, -11.848398208618164, 166.1108856201172, 249.71270751953125, -9.963783264160156, -17.09064483642578, 45.55493927001953, 132.95272827148438, 225.1050262451172, 279.7349853515625, -26.854225158691406, 129.40261840820312, -11.892026901245117, 57.923072814941406, 88.66452026367188, -203.95782470703125, -3.185873031616211, 131.4190216064453, -20.026466369628906, 153.37936401367188, 192.27684020996094, 34.818321228027344, 39.56787872314453, 48.89836120605469, 3.741943359375, -56.013824462890625, 271.615234375, 187.9218292236328, 88.28551483154297, 322.54388427734375, 29.302658081054688, 129.96395874023438, 191.90106201171875, 151.73973083496094, 95.08834838867188, 90.06026458740234, 36.395484924316406, 74.82060241699219, 27.129194259643555, -9.576595306396484, -69.3873291015625, 55.116615295410156, -1.4081306457519531, 230.82244873046875, -179.42633056640625, -117.40618896484375, 55.594818115234375, 169.09974670410156, 128.61288452148438, -72.8922348022461, -60.61467742919922, 21.502960205078125, 7.704547882080078, -183.2624053955078, 117.6895751953125, 77.77103424072266, -43.681922912597656, -62.8156623840332, 251.01068115234375, 19.681594848632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000553.npy"}
|
||||
{"epoch": 0.8359788359788359, "step": 554, "batch_size": 64, "mean": 73.58736419677734, "std": 103.18988037109375, "min": -196.517822265625, "p10": -41.97103118896483, "median": 66.94636535644531, "p90": 194.93787689208983, "max": 262.9522705078125, "pos_frac": 0.765625, "sample": [87.88522338867188, 232.01124572753906, 195.2161102294922, 251.0889892578125, -9.054336547851562, 95.04756164550781, 9.37278938293457, 8.406452178955078, 67.96917724609375, 167.77139282226562, -11.686210632324219, -5.195381164550781, -4.7026519775390625, 139.86605834960938, -196.517822265625, 38.54087829589844, 119.3211669921875, -24.8001708984375, 145.45037841796875, 38.41279602050781, -13.055931091308594, 170.55279541015625, 42.44569396972656, -83.24908447265625, 152.29759216308594, 150.77011108398438, 198.67428588867188, 43.528263092041016, -2.171123504638672, 39.97447967529297, 194.28866577148438, 0.6656036376953125, 155.62286376953125, 77.23211669921875, 6.535285949707031, -92.54698944091797, 187.65818786621094, 31.096330642700195, 140.51528930664062, 4.191070556640625, -193.24009704589844, 166.9214630126953, 65.92355346679688, 104.39764404296875, -49.32997131347656, 129.64962768554688, 19.5997371673584, 145.6009063720703, 48.812347412109375, 13.17877197265625, 163.34445190429688, -13.915840148925781, -50.25394058227539, 173.7228240966797, 207.04100036621094, 18.021827697753906, 188.84384155273438, 178.07659912109375, 122.04979705810547, 195.8275909423828, 262.9522705078125, -132.97088623046875, 179.67291259765625, 16.235641479492188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000554.npy"}
|
||||
{"epoch": 0.8374905517762661, "step": 555, "batch_size": 64, "mean": 59.735477447509766, "std": 115.2296371459961, "min": -237.24000549316406, "p10": -94.68007736206053, "median": 61.8268928527832, "p90": 199.29911193847659, "max": 246.7166290283203, "pos_frac": 0.734375, "sample": [-0.7538700103759766, -215.248046875, 68.87000274658203, 65.14500427246094, 201.655029296875, 19.951622009277344, -237.24000549316406, -19.0146484375, -128.07235717773438, 111.73881530761719, 212.20834350585938, -49.04505920410156, 134.59881591796875, 41.552608489990234, 137.1614227294922, 223.38876342773438, 49.36815643310547, -5.881378173828125, 8.363723754882812, 210.66574096679688, 98.51789093017578, 6.0310211181640625, 211.76414489746094, 60.913787841796875, 187.30947875976562, 50.22129821777344, 113.81001281738281, 188.74038696289062, 39.35343933105469, 193.80197143554688, 164.88427734375, 168.92111206054688, 65.6038589477539, 160.10479736328125, -122.82111358642578, 208.65029907226562, -217.3209228515625, 168.2648468017578, -84.25354766845703, 48.531131744384766, 76.97146606445312, 50.812286376953125, -28.66204071044922, 33.262306213378906, -73.17811584472656, -11.169605255126953, 104.32351684570312, 113.5439224243164, 177.4031524658203, -22.856979370117188, 167.49020385742188, 62.73999786376953, -200.13697814941406, -0.21405792236328125, 175.59454345703125, 49.57417297363281, 95.20501708984375, 48.6082763671875, 112.55162048339844, -99.14859008789062, 246.7166290283203, 26.039419174194336, 166.93051147460938, 10.229070663452148], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000555.npy"}
|
||||
{"epoch": 0.8390022675736961, "step": 556, "batch_size": 64, "mean": 53.17656326293945, "std": 114.0173568725586, "min": -216.80601501464844, "p10": -90.61827392578125, "median": 28.824644088745117, "p90": 203.44151000976564, "max": 303.94921875, "pos_frac": 0.734375, "sample": [172.8524169921875, 22.395050048828125, 44.339080810546875, -10.084415435791016, 16.585189819335938, 181.90106201171875, 209.5056610107422, 68.83045959472656, -8.708818435668945, -92.25656127929688, 18.84739875793457, 0.5142135620117188, 94.3490219116211, 197.30462646484375, 29.70296859741211, 1.9062042236328125, 40.161407470703125, -6.420127868652344, 110.30198669433594, 174.24246215820312, -26.23101043701172, 158.86541748046875, 204.34835815429688, 37.30998992919922, -49.829505920410156, 24.901527404785156, -216.80601501464844, 25.51030158996582, 144.26126098632812, -149.88351440429688, 1.923898696899414, -190.35433959960938, 97.619873046875, 23.841758728027344, 36.30882263183594, 2.4297542572021484, 158.2904815673828, 173.98751831054688, 9.376472473144531, 2.469907760620117, 188.65684509277344, 303.94921875, 32.616825103759766, 3.58056640625, 244.2436065673828, 241.4410400390625, -52.013763427734375, 27.946319580078125, -147.63430786132812, -3.6699981689453125, 211.9675750732422, -93.46833801269531, -57.297264099121094, 209.60980224609375, 7.974796295166016, -90.81491088867188, 66.38204956054688, -90.15945434570312, 82.57136535644531, 186.42184448242188, 201.32553100585938, 201.28054809570312, 80.05259704589844, -86.27250671386719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000556.npy"}
|
||||
{"epoch": 0.8405139833711263, "step": 557, "batch_size": 64, "mean": 81.75762176513672, "std": 96.13499450683594, "min": -128.19723510742188, "p10": -19.29576644897461, "median": 82.37438201904297, "p90": 212.59469757080078, "max": 261.5810852050781, "pos_frac": 0.75, "sample": [207.08465576171875, 132.29908752441406, 104.5684814453125, 22.724090576171875, -5.625743865966797, -33.35197448730469, 198.58224487304688, 39.191978454589844, 197.17189025878906, 96.84895324707031, -1.7588958740234375, -10.173690795898438, -4.264106750488281, 84.3327407836914, 8.579883575439453, 88.6212158203125, 55.827144622802734, 1.5270309448242188, 190.84121704101562, 101.28241729736328, 19.91408920288086, 16.220855712890625, 224.78750610351562, 86.29257202148438, -66.35336303710938, -61.6778564453125, 180.58120727539062, 222.97210693359375, 37.47079086303711, -59.78827667236328, 261.5810852050781, 142.7286834716797, 177.87118530273438, -11.213367462158203, -18.933670043945312, 199.58055114746094, 119.13953399658203, 132.29104614257812, 41.34126281738281, 213.19561767578125, -18.886215209960938, 18.606294631958008, -19.450950622558594, 3.10565185546875, -0.6757373809814453, -128.19723510742188, 250.36183166503906, 247.57992553710938, 184.55648803710938, 26.49011993408203, 15.686882019042969, 80.41602325439453, 29.947250366210938, 178.34381103515625, 168.09107971191406, 87.16079711914062, 211.1925506591797, 89.93717956542969, 43.866390228271484, 88.47979736328125, -53.36830139160156, -4.9433135986328125, 220.3197479248047, 181.5576934814453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000557.npy"}
|
||||
{"epoch": 0.8420256991685563, "step": 558, "batch_size": 64, "mean": 87.41616821289062, "std": 106.9792251586914, "min": -159.00933837890625, "p10": -38.8027561187744, "median": 81.79254913330078, "p90": 221.99877777099613, "max": 365.8407287597656, "pos_frac": 0.78125, "sample": [183.14865112304688, 225.1780242919922, 214.58053588867188, -19.56055450439453, 199.52549743652344, 9.242141723632812, 190.1510009765625, -44.9224739074707, 2.2925186157226562, 104.85243225097656, -24.523414611816406, 173.695556640625, -54.67082214355469, 23.79010772705078, -4.957977294921875, 164.45797729492188, 190.7053985595703, 0.01898193359375, 120.58927917480469, 86.32806396484375, 365.8407287597656, 161.4441375732422, 0.19133377075195312, 189.52120971679688, 144.99493408203125, -85.72445678710938, 193.98095703125, 27.03081512451172, 228.10882568359375, 67.3783950805664, 175.98895263671875, 36.346710205078125, 98.841552734375, 227.8815460205078, 13.140281677246094, 3.474996566772461, 114.93074035644531, 11.4776611328125, 78.83836364746094, 259.7421875, 159.36558532714844, -51.544403076171875, 276.79296875, 32.05308532714844, 196.191650390625, 193.28012084960938, -16.44910430908203, -47.485572814941406, 249.8171844482422, -11.546035766601562, 134.49789428710938, -159.00933837890625, 63.527069091796875, -61.216552734375, 84.74673461914062, -9.889335632324219, 134.17605590820312, 8.173561096191406, -20.940399169921875, 4.869354248046875, 210.61587524414062, 39.916297912597656, 107.63030242919922, 23.711273193359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000558.npy"}
|
||||
{"epoch": 0.8435374149659864, "step": 559, "batch_size": 64, "mean": 76.57566833496094, "std": 120.74654388427734, "min": -173.24996948242188, "p10": -46.53871459960938, "median": 50.32258605957031, "p90": 216.61794738769532, "max": 455.24072265625, "pos_frac": 0.6875, "sample": [25.937911987304688, -42.159019470214844, -13.573833465576172, -18.114471435546875, 113.17443084716797, 455.24072265625, 259.06634521484375, 292.6663818359375, 53.668128967285156, -6.835163116455078, -148.8681640625, 189.44183349609375, 72.8691635131836, 134.5356903076172, 7.294462203979492, 198.78851318359375, 184.28005981445312, 80.25873565673828, 165.9158935546875, 99.30337524414062, 210.29641723632812, 21.43896484375, -42.46470642089844, 167.18173217773438, -10.023033142089844, 31.316139221191406, -125.39988708496094, 189.47357177734375, 67.09268188476562, 195.31887817382812, 46.97704315185547, 83.80827331542969, -33.70709991455078, 305.55133056640625, 219.2073211669922, -50.33403778076172, 110.93319702148438, 11.776327133178711, -45.94066619873047, -19.765159606933594, -11.341072082519531, 3.8468551635742188, -20.772029876708984, 22.63336181640625, 161.08685302734375, 209.35531616210938, 167.45936584472656, -46.795021057128906, 166.88394165039062, -20.801925659179688, 32.13536834716797, 215.85206604003906, -123.8626708984375, 20.026779174804688, 170.5127716064453, 227.05441284179688, 22.987911224365234, 138.67950439453125, -1.2120742797851562, 15.520692825317383, 132.90704345703125, 216.94618225097656, -60.639366149902344, -173.24996948242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000559.npy"}
|
||||
{"epoch": 0.8450491307634165, "step": 560, "batch_size": 64, "mean": 67.39244842529297, "std": 96.13802337646484, "min": -166.07107543945312, "p10": -44.71722564697265, "median": 48.03474426269531, "p90": 198.7454376220703, "max": 240.97576904296875, "pos_frac": 0.78125, "sample": [52.311256408691406, 53.105316162109375, 205.2119903564453, 33.229835510253906, -166.07107543945312, -23.672096252441406, 200.9942626953125, 18.117568969726562, 212.68124389648438, 0.026979446411132812, 80.37715148925781, 9.478004455566406, 120.41435241699219, 39.635711669921875, 5.2745361328125, 9.652099609375, -3.4367218017578125, 5.320802688598633, 25.989933013916016, 129.544677734375, -88.58934020996094, 150.61083984375, 65.95767974853516, 7.1263427734375, 188.41641235351562, 151.74365234375, -4.557868957519531, 191.8961944580078, 196.8050994873047, 3.5529308319091797, 3.409423828125, -14.728767395019531, -36.86425018310547, 218.759033203125, 170.54852294921875, -77.32852172851562, 43.75823211669922, -71.29765319824219, 190.46839904785156, -48.082786560058594, 199.57701110839844, 142.38442993164062, 54.04845428466797, 97.756103515625, 10.244743347167969, 104.4584732055664, 2.5842056274414062, 192.74734497070312, 43.353328704833984, -24.734649658203125, 24.2694091796875, 240.97576904296875, -67.86894989013672, -0.9311981201171875, 193.22491455078125, 203.53091430664062, -109.13441467285156, 68.93673706054688, 184.34426879882812, 63.983097076416016, 191.4910888671875, 92.27558898925781, 41.866092681884766, 113.9443359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000560.npy"}
|
||||
{"epoch": 0.8465608465608465, "step": 561, "batch_size": 64, "mean": 65.54713439941406, "std": 106.16424560546875, "min": -178.4238739013672, "p10": -52.013757324218744, "median": 32.34391689300537, "p90": 204.9062484741211, "max": 277.6182861328125, "pos_frac": 0.71875, "sample": [3.9424362182617188, 13.470664978027344, 11.81329345703125, -9.724252700805664, 26.97825813293457, 189.05734252929688, 123.35414123535156, -16.862136840820312, 183.65823364257812, 38.86083984375, 23.6473388671875, 146.30494689941406, -1.2823295593261719, 117.39073181152344, 20.126861572265625, -83.03022766113281, 36.30787658691406, 166.45126342773438, 203.7669677734375, -7.9926300048828125, -49.39091491699219, 224.32968139648438, 147.60552978515625, -73.16966247558594, 122.77613830566406, -53.13783264160156, 135.58612060546875, -42.0638313293457, -178.4238739013672, 270.7056579589844, -1.7101020812988281, 29.171003341674805, 205.12298583984375, 216.12545776367188, 35.51683044433594, 28.938217163085938, 50.526649475097656, 36.13684844970703, 9.173635482788086, 270.9993896484375, 277.6182861328125, 239.3826446533203, 198.1147003173828, 204.40052795410156, 160.9220428466797, -120.54512786865234, -69.35840606689453, 12.019088745117188, 97.11112976074219, 11.095869064331055, -47.213958740234375, 63.297325134277344, 8.854133605957031, -12.530111312866211, 97.83755493164062, 197.86386108398438, 2.4043941497802734, 147.8256378173828, 111.65249633789062, -100.98541259765625, -2.5276012420654297, 13.684480667114258, 175.40943908691406, -42.37420654296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000561.npy"}
|
||||
{"epoch": 0.8480725623582767, "step": 562, "batch_size": 64, "mean": 83.55224609375, "std": 109.89083099365234, "min": -122.87005615234375, "p10": -30.915501022338862, "median": 68.25790405273438, "p90": 211.3441375732422, "max": 375.6199951171875, "pos_frac": 0.75, "sample": [13.8140869140625, 193.47100830078125, -102.00860595703125, 287.5796203613281, 225.94996643066406, 205.22457885742188, 47.80858612060547, 347.9639892578125, 171.66265869140625, -15.134422302246094, 94.94203186035156, 69.19107055664062, -122.87005615234375, 20.038066864013672, 54.0899658203125, -2.7792625427246094, 45.74134826660156, 24.415863037109375, 128.78781127929688, -85.33305358886719, 146.51519775390625, 192.63682556152344, -32.911468505859375, -11.560218811035156, 5.825126647949219, 189.6954803466797, -6.916067123413086, 34.057884216308594, 49.025390625, -1.9852256774902344, 183.792236328125, -42.497772216796875, 175.07980346679688, 209.3719482421875, 87.72541809082031, 375.6199951171875, -26.258243560791016, -24.307998657226562, 2.9568824768066406, 232.14105224609375, 0.9435577392578125, 21.57707977294922, 81.93116760253906, 124.94754028320312, 180.68798828125, 197.27169799804688, 212.18936157226562, 118.94631958007812, 146.11477661132812, -24.190155029296875, 179.7276611328125, 67.32473754882812, -25.615236282348633, 71.01412200927734, 111.41896057128906, 85.44168853759766, 185.0150146484375, 231.0795440673828, 149.60519409179688, -116.79771423339844, 6.024749755859375, -72.26911926269531, 14.395774841308594, 59.99784851074219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000562.npy"}
|
||||
{"epoch": 0.8495842781557067, "step": 563, "batch_size": 64, "mean": 72.69223022460938, "std": 113.01921844482422, "min": -208.28184509277344, "p10": -63.12365112304687, "median": 67.05557250976562, "p90": 206.24766845703127, "max": 249.1797332763672, "pos_frac": 0.765625, "sample": [99.9051284790039, 168.00863647460938, 41.806941986083984, 0.37233734130859375, 197.91224670410156, 157.6739501953125, -208.28184509277344, 218.05758666992188, 193.25135803222656, 12.974708557128906, 18.162853240966797, 194.9728240966797, 21.22167205810547, -66.32890319824219, 190.14622497558594, 50.36073303222656, -165.90579223632812, 102.22478485107422, -48.62712860107422, 249.1797332763672, 44.617340087890625, 234.6939697265625, -55.64472961425781, 219.6048583984375, 169.7781982421875, 112.0240249633789, 126.3798828125, 0.321502685546875, 76.81759643554688, -190.0633544921875, -5.4429473876953125, -24.514705657958984, 12.214042663574219, 41.89927673339844, -1.0917625427246094, 191.35498046875, 65.68060302734375, -71.72116088867188, 117.9677734375, 32.09418869018555, 200.40447998046875, 208.75189208984375, 189.74925231933594, -9.327224731445312, -126.4534912109375, 68.4305419921875, 53.45030212402344, -41.56578826904297, 199.14715576171875, 126.11942291259766, -153.10086059570312, 81.81954956054688, 34.00788497924805, 150.44879150390625, 154.11659240722656, 54.84010314941406, 209.10708618164062, 244.49093627929688, 40.70643615722656, 191.64852905273438, 139.400146484375, -50.24064636230469, 149.17889404296875, 13.115478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000563.npy"}
|
||||
{"epoch": 0.8510959939531368, "step": 564, "batch_size": 64, "mean": 92.04689025878906, "std": 113.8949203491211, "min": -224.2997589111328, "p10": -50.88452682495116, "median": 96.48964309692383, "p90": 225.85638885498048, "max": 276.3990478515625, "pos_frac": 0.8125, "sample": [189.73558044433594, 14.876441955566406, 221.40084838867188, 194.34219360351562, 89.44307708740234, 252.9974365234375, 128.20384216308594, -201.3849639892578, -40.142005920410156, 11.7449951171875, 211.36441040039062, 178.364013671875, 53.76288986206055, -29.46501350402832, 10.305465698242188, 25.356826782226562, -17.8017578125, 165.05105590820312, 195.9166259765625, 108.25102996826172, 56.0999641418457, 0.7724018096923828, 127.01524353027344, 154.062255859375, 223.9782257080078, 172.43728637695312, 246.02218627929688, -68.26998901367188, -5.817289352416992, 123.6614990234375, 246.70394897460938, 254.7278594970703, 229.60601806640625, 116.45939636230469, 36.38470458984375, -148.98831176757812, 179.40684509277344, 63.051273345947266, 94.11482238769531, 8.210807800292969, 186.9419403076172, 47.69624328613281, 81.04966735839844, -23.459718704223633, -90.83529663085938, 80.2200927734375, 76.2027816772461, 276.3990478515625, 98.86446380615234, -64.57209777832031, 82.07748413085938, 150.0608367919922, 219.70166015625, 16.59324073791504, 180.73489379882812, 42.727439880371094, 187.51304626464844, 226.66131591796875, -224.2997589111328, 201.01580810546875, -55.48846435546875, 20.466449737548828, 120.36337280273438, 182.4042510986328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000564.npy"}
|
||||
{"epoch": 0.8526077097505669, "step": 565, "batch_size": 64, "mean": 64.09708404541016, "std": 98.1173095703125, "min": -176.14068603515625, "p10": -60.98951072692871, "median": 56.97293472290039, "p90": 182.1102172851563, "max": 305.5555114746094, "pos_frac": 0.78125, "sample": [6.592704772949219, 19.788578033447266, -4.661964416503906, 24.659393310546875, 63.934059143066406, 55.706451416015625, 132.10934448242188, 57.69927215576172, 13.99285888671875, 33.35144805908203, 49.047080993652344, 110.82034301757812, 281.28924560546875, -176.14068603515625, -26.19708251953125, 164.4796142578125, 80.77627563476562, 108.99580383300781, 305.5555114746094, 222.39797973632812, 129.46401977539062, -91.99427795410156, 9.371673583984375, 11.163719177246094, 56.24659729003906, 135.06842041015625, 94.09178161621094, 19.161643981933594, 89.9698257446289, 91.64698028564453, 186.59619140625, 7.147699356079102, -64.27610778808594, -50.552249908447266, 121.44966125488281, -58.8760871887207, 8.171342849731445, 170.09783935546875, -172.9508056640625, 78.81600952148438, 162.22900390625, 1.9055137634277344, -61.895263671875, 88.56692504882812, -105.89801025390625, 137.45863342285156, 41.49235534667969, 173.15390014648438, 236.47669982910156, 34.670814514160156, 141.40301513671875, -13.228225708007812, -72.48995971679688, -0.6739768981933594, 44.96236038208008, 39.372135162353516, -30.384765625, 164.6986846923828, 94.60282135009766, 99.12957763671875, 194.484375, 141.90444946289062, 110.31356048583984, 185.94863891601562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000565.npy"}
|
||||
{"epoch": 0.854119425547997, "step": 566, "batch_size": 64, "mean": 60.08150100708008, "std": 126.92169952392578, "min": -297.83935546875, "p10": -84.00038986206053, "median": 48.81562423706055, "p90": 209.72557220458987, "max": 340.672607421875, "pos_frac": 0.6875, "sample": [107.9185791015625, 53.517601013183594, -72.78826141357422, 135.09999084472656, -20.37689208984375, 33.67238998413086, 195.1760711669922, 214.5086669921875, 1.538076400756836, 141.49583435058594, 71.45518493652344, 111.54817199707031, 44.1136474609375, 26.18398666381836, 11.860832214355469, 340.672607421875, -297.83935546875, 155.44369506835938, 215.3555145263672, 159.2725067138672, 223.36671447753906, -18.481170654296875, -27.700206756591797, 86.6698989868164, 2.5800113677978516, -117.30570983886719, 104.30119323730469, 174.0833282470703, 200.26663208007812, 1.4170074462890625, 205.6068878173828, 175.61627197265625, 173.88555908203125, -3.780609130859375, -62.044403076171875, -191.09555053710938, -64.07024383544922, 42.777584075927734, 310.5228271484375, -70.77980041503906, 28.608016967773438, -88.80558776855469, 283.904296875, -28.615684509277344, 188.09608459472656, 4.494148254394531, -119.19371032714844, -127.55211639404297, 58.10608673095703, -17.453659057617188, -62.86580276489258, 5.919059753417969, -59.95661926269531, 191.7919464111328, 1.9817314147949219, 82.76582336425781, -67.4531021118164, 211.49072265625, 85.21355438232422, -139.47598266601562, 162.44142150878906, 131.13861083984375, 142.48077392578125, 204.49093627929688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000566.npy"}
|
||||
{"epoch": 0.8556311413454271, "step": 567, "batch_size": 64, "mean": 75.8134536743164, "std": 104.9013671875, "min": -148.09854125976562, "p10": -79.67790908813475, "median": 67.29642486572266, "p90": 199.7608642578125, "max": 293.3656311035156, "pos_frac": 0.78125, "sample": [121.65504455566406, -5.290210723876953, 1.4682540893554688, 183.2373809814453, -99.34828186035156, 185.04412841796875, 209.60519409179688, 173.4322509765625, 183.13272094726562, 48.620140075683594, 195.7486114501953, 60.304534912109375, 124.6943588256836, -29.645828247070312, 129.61239624023438, -23.399253845214844, -7.0615234375, 44.083648681640625, 29.51154327392578, 43.0178108215332, 162.86959838867188, 12.986198425292969, 4.31562614440918, 192.50433349609375, -85.01691436767578, 0.3681926727294922, -143.63064575195312, -115.85223388671875, 111.2780532836914, -134.197998046875, 160.90953063964844, 67.38175964355469, 197.6622314453125, 293.3656311035156, 227.00045776367188, -0.18901443481445312, 131.49134826660156, 58.80540466308594, 140.46385192871094, 201.73963928222656, -118.22062683105469, 109.44048309326172, 67.21109008789062, 109.91581726074219, 181.85414123535156, 168.1480712890625, 4.030357360839844, 2.6923675537109375, 203.048095703125, 179.02488708496094, 66.5823745727539, 36.89609909057617, 200.6602783203125, -1.197509765625, 16.000213623046875, -148.09854125976562, -67.22023010253906, 109.33250427246094, 221.10824584960938, 41.020782470703125, 158.3995819091797, 87.90733337402344, 32.3616943359375, 138.48577880859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000567.npy"}
|
||||
{"epoch": 0.8571428571428571, "step": 568, "batch_size": 64, "mean": 93.97561645507812, "std": 105.2862548828125, "min": -96.8150634765625, "p10": -31.354446411132812, "median": 76.27018356323242, "p90": 225.85551605224612, "max": 389.13409423828125, "pos_frac": 0.828125, "sample": [159.65228271484375, 67.6614990234375, 35.94316482543945, 38.76966857910156, 108.31291198730469, 149.1387481689453, 166.73379516601562, -63.27400207519531, 22.874752044677734, 7.1815185546875, -47.79126739501953, 190.91616821289062, 58.449058532714844, 268.6443786621094, 8.098373413085938, -54.92008972167969, 51.300743103027344, -14.299369812011719, 48.628501892089844, 145.7165069580078, -96.8150634765625, -29.77886962890625, 84.87886810302734, 185.3719482421875, 228.6941680908203, 219.23199462890625, 162.5791015625, 268.2430114746094, -70.11872863769531, 56.24267578125, 179.79075622558594, 114.72600555419922, 99.7022476196289, -21.635154724121094, 168.00550842285156, 10.035371780395508, -6.2826995849609375, 118.07984161376953, 0.5873012542724609, 173.51971435546875, 3.48675537109375, 214.7452392578125, 114.21743774414062, 95.78875732421875, 173.97030639648438, 54.729347229003906, 36.84376525878906, 3.6868896484375, 8.233108520507812, 58.814300537109375, 104.02285766601562, 196.9814453125, -55.442352294921875, 161.5156707763672, 202.5492401123047, 48.081207275390625, 0.43708038330078125, 328.2721862792969, 234.56549072265625, 241.88194274902344, -32.029693603515625, 34.46833801269531, 202.6906280517578, 389.13409423828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000568.npy"}
|
||||
{"epoch": 0.8586545729402872, "step": 569, "batch_size": 64, "mean": 45.95063781738281, "std": 103.08551025390625, "min": -199.309814453125, "p10": -93.41015853881832, "median": 30.072540283203125, "p90": 187.61996307373047, "max": 274.8197021484375, "pos_frac": 0.71875, "sample": [173.29708862304688, -110.28520202636719, 2.6330623626708984, 173.74551391601562, 35.680389404296875, 193.85256958007812, 27.669876098632812, 252.97711181640625, 55.65277099609375, 189.57345581054688, 82.9267578125, -43.98662567138672, 14.95516586303711, 32.47520446777344, -199.309814453125, 61.62028503417969, 17.306835174560547, 274.8197021484375, -31.892194747924805, 86.61956024169922, 144.46392822265625, 161.139892578125, -0.8950996398925781, 224.87759399414062, -22.647674560546875, 180.67495727539062, 105.18685913085938, 15.955501556396484, 51.40925598144531, -137.05056762695312, -145.6540985107422, 88.36384582519531, 7.7132720947265625, 16.161205291748047, -54.035057067871094, -6.5396728515625, 200.69635009765625, 2.1293506622314453, 49.633644104003906, 19.19761085510254, 68.67756652832031, -41.44043731689453, -129.7996063232422, -41.270416259765625, 130.5250701904297, -138.58090209960938, 68.40324401855469, 4.9504852294921875, 24.633953094482422, 111.2508316040039, 58.36025619506836, 79.59898376464844, 183.0618133544922, -6.528541564941406, -35.039146423339844, 33.47571563720703, 91.51521301269531, 26.631881713867188, 203.65460205078125, -43.499267578125, -120.36029815673828, 24.01885223388672, 176.00047302246094, 21.48772430419922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000569.npy"}
|
||||
{"epoch": 0.8601662887377173, "step": 570, "batch_size": 64, "mean": 91.08104705810547, "std": 105.79183197021484, "min": -143.98606872558594, "p10": -33.24134235382079, "median": 92.55331420898438, "p90": 209.02098541259767, "max": 388.5014953613281, "pos_frac": 0.78125, "sample": [207.556640625, -88.00922393798828, 177.0076904296875, 70.7437744140625, 143.9354248046875, 31.20665740966797, -70.59628295898438, -1.06732177734375, 53.893829345703125, -16.869003295898438, 102.1496810913086, 82.33984375, -1.9998950958251953, 169.7036895751953, -13.265037536621094, 177.40142822265625, 0.45400047302246094, 0.5764045715332031, 99.2537841796875, 123.24905395507812, 109.66981506347656, -136.81967163085938, 38.074851989746094, 185.13270568847656, 207.81541442871094, 3.152273178100586, 42.948238372802734, 185.18690490722656, 226.57083129882812, -38.27404022216797, 7.142646789550781, 127.70441436767578, 181.61019897460938, -79.34455871582031, 0.5547828674316406, 132.61013793945312, 200.70346069335938, 76.97761535644531, 193.93470764160156, 37.163726806640625, 240.18484497070312, 147.96267700195312, 256.5399169921875, -21.498380661010742, 44.660804748535156, 162.7812957763672, 177.12274169921875, 228.3159942626953, 142.4613037109375, -143.98606872558594, 192.63015747070312, -1.7796707153320312, 72.7296371459961, 169.5227813720703, 217.8514404296875, 47.17279052734375, 85.85284423828125, -4.52021598815918, 118.26895141601562, 171.0888671875, 388.5014953613281, 38.84994888305664, 209.53765869140625, -61.24444580078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000570.npy"}
|
||||
{"epoch": 0.8616780045351474, "step": 571, "batch_size": 64, "mean": 51.371917724609375, "std": 109.64021301269531, "min": -196.40603637695312, "p10": -107.47458038330078, "median": 63.68422317504883, "p90": 180.48685455322266, "max": 310.2308349609375, "pos_frac": 0.703125, "sample": [114.39442443847656, 125.2823257446289, 81.97517395019531, 13.625677108764648, -1.3654022216796875, 105.76536560058594, 94.702880859375, 99.07059478759766, 144.63453674316406, 310.2308349609375, 109.37055969238281, 49.774818420410156, -131.95071411132812, -2.962749481201172, -2.7275390625, 132.0858612060547, 11.406906127929688, 173.63528442382812, 25.2327880859375, 112.56977081298828, 174.4461669921875, 19.331092834472656, 66.72386169433594, 90.06745910644531, 213.54010009765625, 85.41957092285156, 111.79811096191406, 141.7415008544922, -196.40603637695312, 15.564750671386719, 2.5898380279541016, -112.21368408203125, -13.201774597167969, -169.96942138671875, 157.98960876464844, -3.1101818084716797, -25.20539093017578, 2.2625770568847656, -96.41667175292969, -46.962982177734375, 33.71715545654297, 70.074951171875, 181.53195190429688, 128.8917236328125, 70.64927673339844, 191.59500122070312, 208.99716186523438, -190.7700958251953, 60.64458465576172, -42.84649658203125, -57.25294494628906, -174.3600616455078, 111.76163482666016, 36.26630401611328, 178.0482940673828, -34.390953063964844, 136.0743408203125, 148.17324829101562, 38.78350067138672, 211.5065460205078, -42.366580963134766, 7.182716369628906, -161.8846893310547, 195.03643798828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000571.npy"}
|
||||
{"epoch": 0.8631897203325775, "step": 572, "batch_size": 64, "mean": 59.242828369140625, "std": 99.17665100097656, "min": -167.2802734375, "p10": -45.412857055664055, "median": 33.75107955932617, "p90": 193.97388916015626, "max": 223.71609497070312, "pos_frac": 0.703125, "sample": [6.887184143066406, -47.39021301269531, 4.051918029785156, 99.33322143554688, 194.97430419921875, 48.383087158203125, 181.49664306640625, 30.164474487304688, -6.184223175048828, 201.62649536132812, -18.073379516601562, 156.9717254638672, 194.61578369140625, 199.86422729492188, -33.50948715209961, 192.47613525390625, -65.68832397460938, 16.576181411743164, 24.18691062927246, 132.1620635986328, 39.57382583618164, -5.97723388671875, 130.48692321777344, 185.0087432861328, 201.16293334960938, 84.47456359863281, -33.275535583496094, 137.1597900390625, -8.078239440917969, 28.989730834960938, 223.71609497070312, 185.4463348388672, 180.1363067626953, -167.2802734375, 131.09523010253906, 12.388858795166016, 55.177825927734375, -61.36860656738281, 8.77950668334961, 123.40318298339844, 109.9427719116211, -14.589933395385742, 132.9552001953125, 151.71356201171875, 28.70953369140625, 157.96987915039062, 9.497512817382812, -144.64735412597656, 211.81588745117188, -3.2826385498046875, 89.81291198730469, 134.176025390625, -32.20646667480469, -10.197395324707031, -160.75772094726562, -40.79902648925781, 37.337684631347656, 119.23709106445312, -20.051666259765625, -117.30056762695312, 168.74969482421875, 0.9420604705810547, 13.524068832397461, 5.045236587524414], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000572.npy"}
|
||||
{"epoch": 0.8647014361300076, "step": 573, "batch_size": 64, "mean": 65.05168151855469, "std": 123.51338195800781, "min": -230.03594970703125, "p10": -85.76209335327147, "median": 58.602054595947266, "p90": 239.34500732421876, "max": 372.24456787109375, "pos_frac": 0.71875, "sample": [44.94459533691406, 130.39923095703125, -186.08763122558594, 187.98788452148438, -91.41545867919922, 58.09832763671875, -5.440286636352539, 21.551048278808594, 88.068115234375, -72.57090759277344, -186.639892578125, -171.4932861328125, -5.685834884643555, -54.34590148925781, -36.69146728515625, 192.36349487304688, -25.629440307617188, 26.258316040039062, 198.02554321289062, -230.03594970703125, 59.10578155517578, 84.94770812988281, 236.31741333007812, 95.40352630615234, -11.26275634765625, 19.436176300048828, 372.24456787109375, 1.3628959655761719, 147.464111328125, 95.69759368896484, 11.262725830078125, 27.7308349609375, 82.54960632324219, 40.22566223144531, 109.34124755859375, -15.950611114501953, 120.72528076171875, 74.953125, 28.66449737548828, 268.98291015625, 173.95089721679688, 172.2551727294922, 179.65843200683594, -5.032381057739258, -47.88850402832031, 106.06051635742188, 250.60426330566406, 3.9374237060546875, 38.810218811035156, 104.43144989013672, 240.64254760742188, 279.88153076171875, 101.70602416992188, 119.66523742675781, 2.3525848388671875, -109.41090393066406, 320.48394775390625, -35.259788513183594, 100.08372497558594, 98.72344970703125, 124.68931579589844, 44.16143035888672, -95.12042236328125, 263.05841064453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000573.npy"}
|
||||
{"epoch": 0.8662131519274376, "step": 574, "batch_size": 64, "mean": 81.73310852050781, "std": 91.13892364501953, "min": -116.24617767333984, "p10": -12.041982269287109, "median": 69.3180923461914, "p90": 197.8892593383789, "max": 392.6213684082031, "pos_frac": 0.796875, "sample": [84.26227569580078, -60.23615264892578, -14.566413879394531, -116.24617767333984, 105.41357421875, 120.41802978515625, 31.570526123046875, 29.962066650390625, 8.266542434692383, -21.75171661376953, -10.368637084960938, 105.86065673828125, 10.733505249023438, 43.47943878173828, -4.396820068359375, 3.3059310913085938, -9.740596771240234, 59.94190979003906, 41.79174041748047, 114.60418701171875, 155.3859405517578, 36.218650817871094, 17.59619140625, 163.41387939453125, -11.769874572753906, 207.9320831298828, 62.656105041503906, 198.2928009033203, -0.9313411712646484, 201.98422241210938, 116.95468139648438, 0.09879302978515625, 83.33047485351562, 48.26837158203125, 138.20333862304688, 151.66339111328125, -93.35601043701172, 142.70887756347656, 196.94766235351562, 9.029573440551758, 117.58694458007812, 26.59234619140625, 183.32461547851562, 215.26736450195312, 144.4296875, 199.07211303710938, 114.11753845214844, 108.75985717773438, 74.53805541992188, 18.073143005371094, -12.158599853515625, 194.76866149902344, -7.0711517333984375, 139.55970764160156, 111.81209564208984, 34.867469787597656, 64.09812927246094, 183.75115966796875, 188.8775634765625, 228.03819274902344, -19.82056427001953, 154.6013641357422, 392.6213684082031, 28.280380249023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000574.npy"}
|
||||
{"epoch": 0.8677248677248677, "step": 575, "batch_size": 64, "mean": 60.06227493286133, "std": 125.51085662841797, "min": -217.18844604492188, "p10": -96.16421890258786, "median": 35.73284912109375, "p90": 223.42757720947267, "max": 413.85546875, "pos_frac": 0.671875, "sample": [127.06696319580078, -153.28382873535156, -217.18844604492188, -5.489587783813477, 413.85546875, 103.747314453125, -27.117828369140625, 123.47045135498047, 21.452529907226562, -20.937244415283203, 132.74412536621094, -75.69921112060547, 222.32826232910156, -33.378780364990234, -148.0045623779297, 96.79081726074219, -139.4278564453125, 124.45603942871094, -0.7816238403320312, 7.108772277832031, 54.522010803222656, 223.89871215820312, 166.87307739257812, 6.6181793212890625, 0.7349014282226562, 20.580001831054688, -152.10073852539062, 155.92933654785156, 262.7592468261719, -13.838325500488281, 16.382383346557617, 70.2195816040039, 202.36306762695312, 290.368896484375, 181.9054412841797, -74.30859375, 225.5562744140625, -10.11151123046875, 245.4217529296875, 166.72488403320312, -41.60798645019531, 37.720703125, 104.7014389038086, 33.7449951171875, 4.6756744384765625, -13.148292541503906, 54.63783264160156, -178.36648559570312, 123.72120666503906, 297.1161804199219, 220.02174377441406, 125.69712829589844, 21.323837280273438, 47.12228775024414, -104.9349365234375, 27.58197021484375, 90.76304626464844, -7.417461395263672, 14.601160049438477, 97.6800537109375, -16.694602966308594, 183.18304443359375, 149.076416015625, -19.423770904541016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000575.npy"}
|
||||
{"epoch": 0.8692365835222978, "step": 576, "batch_size": 64, "mean": 84.85408020019531, "std": 109.95771789550781, "min": -221.61595153808594, "p10": -53.395279693603506, "median": 71.60464477539062, "p90": 210.8927444458008, "max": 324.788818359375, "pos_frac": 0.765625, "sample": [17.680999755859375, 175.54722595214844, -13.437614440917969, 71.91909790039062, 208.06500244140625, 49.85270690917969, 197.55885314941406, 13.88517951965332, 71.29019165039062, 145.96995544433594, -61.04975128173828, -82.92947387695312, 197.6857147216797, -64.94912719726562, 113.91910552978516, -127.14395141601562, -43.53917694091797, 176.8921356201172, 324.788818359375, 183.734130859375, 198.01124572753906, 227.91510009765625, 124.22622680664062, 6.478292465209961, 153.1529541015625, 101.08837890625, 12.150774002075195, -68.83729553222656, 277.65252685546875, -57.61932373046875, 41.71607971191406, 48.33740997314453, 67.11033630371094, 190.866943359375, 77.84249877929688, 64.22962951660156, 258.4017333984375, -10.830398559570312, -3.12469482421875, 10.411880493164062, 205.32901000976562, -21.716190338134766, 181.96319580078125, 20.750944137573242, 211.22799682617188, 1.4584197998046875, 210.11048889160156, 189.63331604003906, -4.257009506225586, 211.97769165039062, 203.7273712158203, -221.61595153808594, 228.74156188964844, 145.4314422607422, 78.84021759033203, 197.66302490234375, -11.462892532348633, -3.442474365234375, 31.67316436767578, 87.38756561279297, 0.8940181732177734, 57.172218322753906, 30.823204040527344, 123.43074035644531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000576.npy"}
|
||||
{"epoch": 0.8707482993197279, "step": 577, "batch_size": 64, "mean": 57.643611907958984, "std": 108.6761703491211, "min": -243.84548950195312, "p10": -65.97652130126953, "median": 44.849618911743164, "p90": 196.9177993774414, "max": 274.27410888671875, "pos_frac": 0.78125, "sample": [18.8402099609375, 9.162994384765625, -16.17577362060547, 53.53179931640625, 7.2505035400390625, 180.070068359375, 18.213214874267578, -21.441682815551758, 237.25607299804688, 12.459785461425781, -58.557350158691406, 197.51731872558594, 163.33114624023438, -53.018829345703125, -94.27203369140625, 205.96917724609375, 211.4840087890625, 183.64181518554688, -159.775390625, 126.72380065917969, 206.5276641845703, 84.03333282470703, 13.822589874267578, 166.4207763671875, 57.67376708984375, -69.15616607666016, -127.22352600097656, 137.36048889160156, 157.2512969970703, 118.62481689453125, -28.54278564453125, 178.39710998535156, -35.31439208984375, 47.06379699707031, 32.376922607421875, 22.478439331054688, 29.557231903076172, 11.441192626953125, 22.594223022460938, 8.802799224853516, 50.08820343017578, 23.62701416015625, 214.0330047607422, 3.0640621185302734, 195.5189208984375, 274.27410888671875, -230.97543334960938, 135.83599853515625, 63.570068359375, 55.757904052734375, 33.04374694824219, 124.81283569335938, 184.40625, 183.02667236328125, 8.644638061523438, 44.533206939697266, -243.84548950195312, -2.686746597290039, -72.31314086914062, 130.93136596679688, 3.7978515625, 106.32235717773438, 102.15727233886719, 45.16603088378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000577.npy"}
|
||||
{"epoch": 0.872260015117158, "step": 578, "batch_size": 64, "mean": 56.66440963745117, "std": 114.87357330322266, "min": -218.6173553466797, "p10": -65.62986450195312, "median": 37.27638244628906, "p90": 194.37952117919923, "max": 349.17431640625, "pos_frac": 0.65625, "sample": [115.06893920898438, -5.553199768066406, 93.97457885742188, 51.03485107421875, 70.19783782958984, -64.73153686523438, 83.70401000976562, 19.36865997314453, 161.5374755859375, 1.7031269073486328, 9.148529052734375, 349.17431640625, -2.3406143188476562, 137.95071411132812, 180.81785583496094, -218.6173553466797, 164.92123413085938, 192.31903076171875, 161.5269775390625, 115.43057250976562, 174.10043334960938, 108.339599609375, 1.674295425415039, -5.379238128662109, 14.836301803588867, 79.24381256103516, 224.09637451171875, 0.4941253662109375, 95.23934173583984, -5.095481872558594, 262.6598205566406, 187.16375732421875, 195.26258850097656, -66.01486206054688, -7.387031555175781, 34.97154235839844, -3.777822494506836, 93.89765930175781, 71.20734405517578, 78.06703186035156, 188.90345764160156, -195.5753173828125, 39.58122253417969, -54.53855895996094, -18.584518432617188, 13.936080932617188, -46.009422302246094, 182.8601531982422, -104.63365173339844, 216.4484405517578, 114.39813995361328, -110.77294158935547, 303.21893310546875, -26.818063735961914, -58.27009582519531, -78.40196228027344, 27.650360107421875, 77.26679229736328, -42.37885284423828, 18.610525131225586, -29.49142074584961, -5.589790344238281, -155.02932739257812, 219.5062713623047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000578.npy"}
|
||||
{"epoch": 0.873771730914588, "step": 579, "batch_size": 64, "mean": 81.16737365722656, "std": 95.40596771240234, "min": -186.34852600097656, "p10": -32.28306884765624, "median": 84.04261016845703, "p90": 199.41866302490237, "max": 275.2857666015625, "pos_frac": 0.78125, "sample": [117.57691192626953, 79.53207397460938, 74.27947235107422, 48.91516876220703, 62.300926208496094, -136.3436279296875, 208.3068084716797, 86.40434265136719, 143.82516479492188, 275.2857666015625, 106.97944641113281, 181.393310546875, 143.2625274658203, 202.00318908691406, 127.760498046875, 16.133438110351562, 178.84486389160156, -36.23606872558594, -22.675453186035156, 140.4479217529297, 228.60382080078125, -67.99848937988281, 189.58258056640625, -4.432355880737305, 69.52708435058594, 153.57254028320312, 206.19827270507812, 16.404434204101562, 195.01551818847656, 183.36859130859375, -62.92957305908203, 21.44182586669922, 154.22879028320312, -1.6922130584716797, 105.62976837158203, 120.5467529296875, 44.9136962890625, -11.806272506713867, -64.72633361816406, 188.30946350097656, -0.720123291015625, 4.560625076293945, -12.135101318359375, 15.849143981933594, 81.68087768554688, 5.483888626098633, -23.059402465820312, -186.34852600097656, 201.30572509765625, 142.35244750976562, 92.845458984375, 191.49862670898438, 59.353668212890625, 220.58729553222656, 125.53561401367188, 6.622915267944336, 152.35218811035156, 145.22232055664062, 42.06420135498047, 87.2061767578125, -58.65455627441406, 24.632144927978516, 138.216552734375, 76.50556945800781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000579.npy"}
|
||||
{"epoch": 0.8752834467120182, "step": 580, "batch_size": 64, "mean": 68.21715545654297, "std": 119.710693359375, "min": -199.26893615722656, "p10": -55.452216339111324, "median": 59.30598449707031, "p90": 231.8698394775391, "max": 338.383056640625, "pos_frac": 0.671875, "sample": [-130.3704833984375, -57.760162353515625, -32.47261047363281, 10.988080978393555, -37.477508544921875, 18.525819778442383, 178.0437774658203, 83.24533081054688, 77.802001953125, 39.355079650878906, 223.89366149902344, 81.01091766357422, 29.419265747070312, -9.262935638427734, -41.74646759033203, 73.84346771240234, 170.38357543945312, 135.18112182617188, 3.446920394897461, 45.80680847167969, -2.1672706604003906, 137.8013458251953, 254.04708862304688, 65.70552825927734, 127.03133392333984, -35.68353271484375, 280.89404296875, -7.486724853515625, -11.034826278686523, 235.15463256835938, 224.205322265625, 201.0437774658203, -6.247993469238281, 141.90252685546875, -37.61289978027344, 192.83712768554688, -17.572368621826172, -199.26893615722656, 11.09115219116211, 338.383056640625, -182.2637176513672, 31.915618896484375, -85.3740234375, 124.26467895507812, 287.16656494140625, -17.31071662902832, 54.753562927246094, 63.85840606689453, 256.44720458984375, 151.40289306640625, 142.86404418945312, 152.24342346191406, -50.06700897216797, -153.29330444335938, 20.50467300415039, -43.4737548828125, 72.96531677246094, 142.1988067626953, 129.0260772705078, 290.5019226074219, 41.307647705078125, -104.55244445800781, 144.58206176757812, 141.3520965576172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000580.npy"}
|
||||
{"epoch": 0.8767951625094482, "step": 581, "batch_size": 64, "mean": 67.42747497558594, "std": 111.4162826538086, "min": -160.12240600585938, "p10": -92.83510894775391, "median": 41.771156311035156, "p90": 201.63425903320314, "max": 371.8711853027344, "pos_frac": 0.71875, "sample": [144.46231079101562, 210.28839111328125, 39.53510284423828, 2.8414459228515625, 93.35401916503906, -8.203201293945312, 11.415023803710938, -89.54263305664062, 103.48617553710938, 111.77991485595703, -99.762939453125, 17.74637222290039, 15.624029159545898, 29.584569931030273, 167.8406982421875, -82.21600341796875, 172.60610961914062, 92.10523223876953, 137.447998046875, 30.026342391967773, 177.39230346679688, 31.437957763671875, 202.36734008789062, -46.39544677734375, 240.77220153808594, 199.92373657226562, 176.5841827392578, -41.28265380859375, 44.00720977783203, 93.66897583007812, -28.915008544921875, 178.34881591796875, 44.675533294677734, 145.56976318359375, 143.41232299804688, 129.7122344970703, 37.91064453125, -160.12240600585938, 263.85137939453125, 242.98794555664062, -41.24987030029297, -124.17489624023438, 119.385009765625, 29.359909057617188, 113.74691772460938, -97.47224426269531, 371.8711853027344, 20.6859130859375, -13.51632308959961, -101.50637817382812, 189.32916259765625, -26.740257263183594, 119.69621276855469, 185.95138549804688, 19.428119659423828, 142.2548065185547, -3.00482177734375, -102.58651733398438, -94.24617004394531, 33.176334381103516, 34.651397705078125, 184.94403076171875, 204.18496704101562, -25.13580322265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000581.npy"}
|
||||
{"epoch": 0.8783068783068783, "step": 582, "batch_size": 64, "mean": 63.176971435546875, "std": 106.87153625488281, "min": -184.7770233154297, "p10": -49.911795043945304, "median": 51.813148498535156, "p90": 193.26396484375002, "max": 312.72662353515625, "pos_frac": 0.734375, "sample": [118.93216705322266, 182.08920288085938, 54.59813690185547, 105.96966552734375, 21.114990234375, 33.33198928833008, 225.70606994628906, 58.91383743286133, 167.4546356201172, 152.29864501953125, 80.70307922363281, 49.028160095214844, 268.98260498046875, 312.72662353515625, -40.690948486328125, 194.64593505859375, -53.86358642578125, -16.665695190429688, 110.5848388671875, 122.79149627685547, -5.377769470214844, 140.759765625, 11.102466583251953, 227.7781982421875, 176.09719848632812, 241.5302276611328, -12.629659652709961, 9.646020889282227, -139.29360961914062, -181.42161560058594, -6.305105209350586, 11.58367919921875, 111.16939544677734, -25.48943328857422, -22.187543869018555, 164.01023864746094, 146.56346130371094, 62.2736930847168, 206.927978515625, 8.81612777709961, 108.35516357421875, 101.29328918457031, 76.51823425292969, 9.323274612426758, 6.05836296081543, -15.301498413085938, -58.02912139892578, 173.83096313476562, 9.987113952636719, -184.7770233154297, 2.933897018432617, 22.025588989257812, 0.7211570739746094, 0.1970062255859375, -109.67160034179688, 70.92056274414062, 170.6385040283203, -37.952369689941406, 182.70909118652344, -33.6107177734375, -83.75250244140625, 11.450019836425781, 155.21389770507812, 190.03936767578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000582.npy"}
|
||||
{"epoch": 0.8798185941043084, "step": 583, "batch_size": 64, "mean": 77.58027648925781, "std": 118.5909652709961, "min": -216.4075469970703, "p10": -45.503845596313475, "median": 87.25904083251953, "p90": 211.47789916992187, "max": 296.58880615234375, "pos_frac": 0.671875, "sample": [85.60635375976562, -25.582786560058594, -15.567634582519531, -2.143901824951172, 177.856689453125, 88.91172790527344, -41.095909118652344, -18.301849365234375, -153.88584899902344, -6.4950714111328125, 209.72984313964844, -15.840938568115234, -43.630977630615234, 160.30010986328125, 95.38327026367188, -55.964927673339844, 71.03395080566406, 1.670114517211914, 207.04995727539062, 176.48846435546875, 39.81208801269531, -163.6859130859375, 178.81272888183594, 7.765678405761719, 156.80055236816406, 140.27296447753906, 274.9828186035156, -216.4075469970703, 25.049610137939453, 183.98736572265625, 49.28934860229492, 56.991477966308594, -18.578201293945312, 26.535385131835938, 209.90432739257812, 227.81468200683594, -7.8118896484375, -46.30650329589844, 210.242431640625, 179.11537170410156, 101.70318603515625, 223.9078369140625, -199.8765106201172, 296.58880615234375, -0.38941001892089844, 183.61175537109375, 149.26385498046875, 160.4222412109375, 117.23641204833984, -17.187400817871094, 255.04937744140625, 98.98322296142578, 199.7762451171875, 14.921548843383789, 117.19912719726562, 197.6136474609375, -106.46379852294922, 105.98441314697266, -1.3548507690429688, 189.3333282470703, 212.00738525390625, 221.12664794921875, 40.49952697753906, -4.9264373779296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000583.npy"}
|
||||
{"epoch": 0.8813303099017384, "step": 584, "batch_size": 64, "mean": 68.2786865234375, "std": 123.4284439086914, "min": -185.2158203125, "p10": -77.9887596130371, "median": 41.69529724121094, "p90": 242.16078338623055, "max": 328.9433288574219, "pos_frac": 0.671875, "sample": [212.98257446289062, 255.7869873046875, 55.390377044677734, -7.034889221191406, 71.51829528808594, -5.029611587524414, 254.33538818359375, 223.1533203125, 165.5787353515625, 204.2556915283203, 111.50299072265625, -1.9912586212158203, -13.322122573852539, -77.0105209350586, -145.25741577148438, 124.18817901611328, 121.06322479248047, 202.76336669921875, 186.01779174804688, -7.409778594970703, 185.06118774414062, 250.3068389892578, -1.1110458374023438, 179.8477325439453, 328.9433288574219, 182.937255859375, 95.8031005859375, 8.525787353515625, 41.28623962402344, 21.749786376953125, 195.40313720703125, -78.40800476074219, 144.73263549804688, 14.33349609375, -129.49197387695312, 188.4600830078125, -14.817214965820312, -22.82269287109375, -21.59705352783203, -51.254241943359375, 3.257152557373047, 97.30066680908203, -65.50181579589844, 42.10435485839844, 98.17884063720703, 285.6136474609375, 3.0647830963134766, 66.62611389160156, 94.89656829833984, -103.38152313232422, -185.2158203125, 29.273784637451172, -45.009971618652344, 181.08419799804688, 1.3931961059570312, 291.92529296875, 257.6241149902344, 11.578302383422852, 12.284360885620117, 39.33683776855469, -19.280113220214844, 132.49551391601562, -129.63441467285156, -179.54759216308594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000584.npy"}
|
||||
{"epoch": 0.8828420256991686, "step": 585, "batch_size": 64, "mean": 47.877723693847656, "std": 105.96646118164062, "min": -245.482666015625, "p10": -42.07696456909179, "median": 37.46782302856445, "p90": 194.2931335449219, "max": 328.1932678222656, "pos_frac": 0.71875, "sample": [22.828083038330078, 137.15931701660156, 83.73208618164062, -4.1386566162109375, 67.30878448486328, 78.47409057617188, 49.86671447753906, 218.44244384765625, -11.843231201171875, 7.5375213623046875, -184.74427795410156, 23.707408905029297, 136.93246459960938, 35.72718811035156, -0.3385887145996094, 328.1932678222656, -145.33253479003906, -10.835014343261719, 67.18473815917969, 110.75212097167969, 129.6343231201172, 8.078611373901367, -7.094953536987305, -144.6849822998047, -8.69337272644043, 8.135427474975586, -39.10858154296875, 8.359062194824219, 219.25572204589844, 159.66448974609375, 8.483295440673828, 18.123565673828125, 38.3141975402832, -6.178241729736328, -223.5758514404297, 4.637855529785156, 15.2550048828125, 149.44122314453125, 203.59054565429688, 114.02825927734375, 42.453800201416016, 156.6461181640625, 89.14486694335938, -50.35631561279297, -245.482666015625, 204.766357421875, 208.1095733642578, 194.94308471679688, -43.34912872314453, -17.24504852294922, 36.78895568847656, 38.146690368652344, -27.92340087890625, 6.173585891723633, 57.48792266845703, 94.37667083740234, 60.3226318359375, 191.8918914794922, 192.77658081054688, 92.76580810546875, 74.2951889038086, 1.8274612426757812, 67.07032012939453, -27.736122131347656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000585.npy"}
|
||||
{"epoch": 0.8843537414965986, "step": 586, "batch_size": 64, "mean": 68.22322845458984, "std": 81.63485717773438, "min": -90.06100463867188, "p10": -12.327544021606444, "median": 47.92987060546875, "p90": 192.63423919677734, "max": 257.52777099609375, "pos_frac": 0.796875, "sample": [89.73089599609375, 20.538063049316406, 163.45704650878906, 49.25641632080078, 48.79548645019531, 143.2139892578125, 24.715574264526367, 8.362396240234375, -10.224342346191406, -16.01008415222168, 124.56807708740234, -17.059532165527344, 3.9288787841796875, 235.53640747070312, 204.8867950439453, -11.10008430480957, 116.30020141601562, 116.57597351074219, 78.49456024169922, 136.59783935546875, 22.568416595458984, 67.78660583496094, 24.287525177001953, 190.7960662841797, 21.983123779296875, -3.3914241790771484, 14.491249084472656, 18.9249267578125, 163.7886505126953, 28.475818634033203, 155.06704711914062, 164.4267578125, 66.89900970458984, 60.8957633972168, 237.6890411376953, 2.4923858642578125, -90.06100463867188, 228.51138305664062, -3.0572166442871094, 6.973257064819336, 4.640443801879883, 29.60833740234375, -34.72074890136719, 129.8649444580078, 90.75761413574219, -12.54934310913086, 1.0515670776367188, 72.6343994140625, -4.658622741699219, 126.25503540039062, 257.52777099609375, -11.810012817382812, 12.723831176757812, 49.501243591308594, 193.42202758789062, 44.18671417236328, -52.54332733154297, 195.30868530273438, 47.06425476074219, -34.61393737792969, 181.51461791992188, 11.792583465576172, 94.50788116455078, 84.70864868164062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000586.npy"}
|
||||
{"epoch": 0.8858654572940288, "step": 587, "batch_size": 64, "mean": 66.93518829345703, "std": 134.68746948242188, "min": -514.984130859375, "p10": -74.00084991455077, "median": 76.17851638793945, "p90": 208.75170745849613, "max": 324.5198669433594, "pos_frac": 0.796875, "sample": [27.036060333251953, -125.71025085449219, -3.4074478149414062, 143.48965454101562, 148.4673614501953, -137.87452697753906, 49.9211311340332, -48.080413818359375, 182.00588989257812, 162.45382690429688, 17.939983367919922, 79.014892578125, -85.85981750488281, 6.1964874267578125, 0.1645965576171875, 88.31819152832031, 151.78155517578125, -5.221828460693359, -38.09843444824219, 223.8289337158203, 212.1594696044922, 150.58285522460938, 0.26955413818359375, 200.80026245117188, 122.18606567382812, 23.33924102783203, 212.52099609375, 14.217544555664062, -514.984130859375, 20.506237030029297, 123.08084869384766, 139.93373107910156, 127.10151672363281, 137.2628936767578, 14.419971466064453, 79.578125, 93.40028381347656, -294.38641357421875, -77.91473388671875, 324.5198669433594, 167.66134643554688, 5.553928375244141, 191.93051147460938, 121.96859741210938, 2.262937545776367, 167.57125854492188, 12.028701782226562, 185.54690551757812, 178.14065551757812, 116.475830078125, 223.34095764160156, -112.72354888916016, 155.896484375, 12.913887023925781, 66.20501708984375, 259.51947021484375, 27.691322326660156, -14.754074096679688, -64.86845397949219, 55.280174255371094, 73.3421401977539, 306.4444580078125, 192.6455841064453, 8.817756652832031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000587.npy"}
|
||||
{"epoch": 0.8873771730914588, "step": 588, "batch_size": 64, "mean": 76.5220947265625, "std": 122.91536712646484, "min": -231.08273315429688, "p10": -84.60802612304687, "median": 67.7966537475586, "p90": 214.37485961914064, "max": 360.54241943359375, "pos_frac": 0.78125, "sample": [78.70146179199219, -99.06594848632812, -50.005897521972656, 198.69418334960938, 11.036605834960938, 153.55726623535156, 180.42770385742188, 30.562088012695312, 213.76043701171875, 190.41119384765625, 341.33929443359375, 48.46171569824219, 12.552289962768555, 138.5607147216797, 51.81670379638672, 130.0272979736328, 214.63818359375, 328.0962829589844, 20.927610397338867, 360.54241943359375, 85.31220245361328, 172.80801391601562, -32.24715805053711, 190.67733764648438, 2.0523853302001953, 74.57513427734375, 33.94383239746094, -7.337379455566406, 5.2631683349609375, -86.55557250976562, 4.844202041625977, 121.62845611572266, -139.23483276367188, 139.3285369873047, 133.43179321289062, 241.6907958984375, 20.5489444732666, 130.40101623535156, 85.17292785644531, -100.00959014892578, -31.59825897216797, -62.24713134765625, 88.27176666259766, 219.2245330810547, 138.17111206054688, 93.19953918457031, 181.0252227783203, -8.64493179321289, 32.96354675292969, 174.35646057128906, 34.69636535644531, -165.70498657226562, 55.75544738769531, 45.994972229003906, 142.62521362304688, 55.95088195800781, -80.06375122070312, 21.651145935058594, 61.01817321777344, -158.81845092773438, 178.2952423095703, 274.7916259765625, 206.2469482421875, -231.08273315429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000588.npy"}
|
||||
{"epoch": 0.8888888888888888, "step": 589, "batch_size": 64, "mean": 75.4565200805664, "std": 90.48625183105469, "min": -124.06661987304688, "p10": -37.641791534423824, "median": 79.0855712890625, "p90": 187.59858093261718, "max": 299.65283203125, "pos_frac": 0.78125, "sample": [43.577728271484375, 42.736854553222656, -119.68325805664062, 197.66769409179688, 104.27664947509766, -0.46197509765625, -78.59130096435547, -10.20562744140625, 32.611732482910156, -6.5861053466796875, -45.03462219238281, 42.95635223388672, 299.65283203125, 2.779712677001953, 161.6482391357422, 84.15204620361328, 93.52840423583984, 149.23687744140625, 151.7030487060547, 149.11849975585938, 87.53730773925781, 102.74154663085938, 178.5189208984375, -97.44744873046875, 233.0854949951172, 123.75515747070312, -38.63666534423828, 118.75867462158203, 150.07057189941406, 145.99456787109375, 55.09367370605469, -6.747585296630859, 188.1758575439453, 70.36997985839844, -35.32041931152344, 83.50701141357422, 74.66413116455078, 35.91094207763672, 86.74446868896484, 195.01431274414062, 40.218841552734375, 112.2669906616211, 196.36834716796875, 113.65782928466797, 64.56739807128906, -59.383941650390625, 136.21286010742188, -25.827411651611328, -9.15230941772461, 26.538848876953125, -124.06661987304688, 173.83065795898438, 170.88055419921875, 19.711708068847656, 57.609413146972656, 186.25160217285156, 129.44407653808594, 152.62469482421875, 22.96080780029297, 27.21429443359375, 92.63092041015625, 31.947338104248047, 244.12847900390625, 1.707803726196289], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000589.npy"}
|
||||
{"epoch": 0.890400604686319, "step": 590, "batch_size": 64, "mean": 70.28041076660156, "std": 102.27523040771484, "min": -186.4287872314453, "p10": -48.668305969238276, "median": 65.80617141723633, "p90": 201.29828186035158, "max": 254.08102416992188, "pos_frac": 0.71875, "sample": [121.21440887451172, 40.95954132080078, 145.431884765625, 6.794975280761719, 197.13430786132812, 180.08135986328125, 109.4454345703125, -168.1118927001953, 34.26475143432617, -35.31884002685547, 58.23529052734375, 1.9526252746582031, 9.14276123046875, 29.072364807128906, -186.4287872314453, 187.1309814453125, 176.12586975097656, 53.72222900390625, -17.900676727294922, -1.3876190185546875, -60.17511749267578, 90.61199951171875, 4.465400695800781, 201.87307739257812, 199.95709228515625, -14.198585510253906, -43.887176513671875, 108.65278625488281, 254.08102416992188, 83.02981567382812, -42.83478546142578, 248.92642211914062, 14.578386306762695, -3.4119873046875, 99.38668823242188, 74.33457946777344, 93.20818328857422, 27.633316040039062, 77.04652404785156, 11.095687866210938, 215.77999877929688, 117.6429443359375, 248.53326416015625, 176.79287719726562, 187.56216430664062, 111.77235412597656, 99.79852294921875, 209.0102996826172, -55.35694885253906, 34.19050598144531, -63.457176208496094, 202.89254760742188, 136.04840087890625, -25.606674194335938, -1.7552642822265625, 191.2872314453125, -1.7389163970947266, 73.3770523071289, 193.9470977783203, -3.855792999267578, 45.61465835571289, -50.71736145019531, 188.5583953857422, -98.30865478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000590.npy"}
|
||||
{"epoch": 0.891912320483749, "step": 591, "batch_size": 64, "mean": 88.06401062011719, "std": 132.9715118408203, "min": -203.8090057373047, "p10": -85.0497001647949, "median": 79.96005249023438, "p90": 228.45464019775392, "max": 449.36956787109375, "pos_frac": 0.765625, "sample": [2.965608596801758, 203.0108642578125, 175.08811950683594, 179.5203857421875, 176.28811645507812, 46.61817169189453, 196.500732421875, 100.23435974121094, 83.087158203125, 99.11125183105469, 58.48210144042969, -146.29098510742188, 212.0103302001953, -96.24451446533203, 108.9699478149414, -23.678634643554688, 32.64056396484375, 76.83294677734375, 106.62591552734375, 4.595043182373047, 229.71792602539062, 21.963563919067383, 322.3041076660156, -174.55615234375, 188.6955108642578, -166.55723571777344, 199.6519775390625, 110.40438842773438, -6.69171142578125, 15.774993896484375, -203.8090057373047, -178.6565704345703, 283.3029479980469, 225.50697326660156, -3.2905654907226562, 212.29994201660156, -5.613651275634766, 65.53677368164062, 190.42742919921875, 23.955432891845703, 8.824974060058594, -9.107002258300781, 60.6500244140625, 173.4635772705078, 0.9455432891845703, 261.814697265625, -10.868257522583008, -99.73893737792969, -6.378913879394531, 449.36956787109375, -58.928466796875, 19.429176330566406, 273.6402587890625, 29.514793395996094, 13.734737396240234, 208.7171630859375, 212.31546020507812, 146.74557495117188, 171.31553649902344, 135.11375427246094, 219.90428161621094, 283.75146484375, 4.401006698608398, 200.73214721679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000591.npy"}
|
||||
{"epoch": 0.8934240362811792, "step": 592, "batch_size": 64, "mean": 66.00066375732422, "std": 105.29450225830078, "min": -123.33518981933594, "p10": -50.418979263305644, "median": 27.538074493408203, "p90": 200.3012145996094, "max": 362.1050720214844, "pos_frac": 0.734375, "sample": [72.74607849121094, 221.05783081054688, 202.50379943847656, 18.48345375061035, 189.56021118164062, 193.13864135742188, 241.58883666992188, 12.59896469116211, 6.47613525390625, 35.63878631591797, -28.089637756347656, 132.2556610107422, 65.92802429199219, 20.480331420898438, -61.64068603515625, 95.2458724975586, 195.16184997558594, 114.35580444335938, 5.649269104003906, 23.594844818115234, -105.82002258300781, 187.40777587890625, 101.22697448730469, 267.1914367675781, 20.978641510009766, 143.4128875732422, 125.15247344970703, 186.41900634765625, 193.5220947265625, 20.088485717773438, -97.63574981689453, 135.74542236328125, -19.895782470703125, 162.82266235351562, 31.444129943847656, 79.72471618652344, -123.33518981933594, 111.7884521484375, -115.28026580810547, -57.54795837402344, -5.883182525634766, -3.647581100463867, 30.28417205810547, -9.564285278320312, -68.42349243164062, 0.368011474609375, 30.070236206054688, 298.48565673828125, 236.45480346679688, 7.2603759765625, 362.1050720214844, -0.1147613525390625, 24.915477752685547, -0.17360687255859375, 15.751220703125, 5.025938034057617, -27.987701416015625, -14.028358459472656, -33.78469467163086, 10.754047393798828, 63.7733039855957, 25.00591278076172, 123.0364990234375, 150.21490478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000592.npy"}
|
||||
{"epoch": 0.8949357520786092, "step": 593, "batch_size": 64, "mean": 72.10696411132812, "std": 108.47572326660156, "min": -191.3309326171875, "p10": -71.09162139892574, "median": 64.05571365356445, "p90": 212.38274230957035, "max": 255.7581024169922, "pos_frac": 0.78125, "sample": [202.61940002441406, 33.915035247802734, 202.43655395507812, 6.2508544921875, 179.6074676513672, -30.721885681152344, 103.11087799072266, -191.3309326171875, 216.56703186035156, 51.90507507324219, 198.67193603515625, 36.0909309387207, 255.7581024169922, 173.92185974121094, 55.04307556152344, 8.97940444946289, 123.1673812866211, 118.58639526367188, 185.01824951171875, 144.95193481445312, 20.70699119567871, 81.68561553955078, 232.15579223632812, 219.8726806640625, -19.070098876953125, -6.4246673583984375, 9.729585647583008, 174.8502197265625, 119.58464813232422, -127.99905395507812, 80.28884887695312, -88.39293670654297, 26.36370849609375, 58.59027862548828, 107.21021270751953, 174.44537353515625, 1.8685836791992188, -8.527801513671875, 138.62039184570312, -138.30906677246094, -132.77139282226562, -1.99908447265625, 225.5902557373047, -131.96368408203125, 172.7410888671875, 163.14846801757812, 175.03163146972656, 11.420328140258789, -17.40178680419922, 242.22662353515625, 69.52114868164062, -12.89280891418457, 200.23892211914062, 103.22317504882812, 78.45807647705078, -119.85608673095703, 11.999513626098633, 220.13267517089844, 1.9604911804199219, 13.12371826171875, 18.935211181640625, 32.77061462402344, 18.838165283203125, 140.572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000593.npy"}
|
||||
{"epoch": 0.8964474678760394, "step": 594, "batch_size": 64, "mean": 63.89067459106445, "std": 96.47463989257812, "min": -256.4757080078125, "p10": -54.20079803466796, "median": 66.16675186157227, "p90": 183.7412872314453, "max": 234.350341796875, "pos_frac": 0.765625, "sample": [69.66755676269531, 161.6148223876953, -99.72756958007812, -8.578666687011719, 78.08680725097656, 148.09725952148438, 24.124183654785156, 53.55894470214844, 83.67530822753906, 5.502571105957031, -77.90631103515625, 28.047298431396484, -8.514259338378906, 150.14231872558594, 28.87479591369629, 6.815258026123047, -63.020477294921875, 2.578754425048828, 88.71524047851562, 10.925827026367188, 200.05682373046875, 90.4259033203125, 1.139333724975586, 189.13502502441406, 204.18641662597656, 113.90632629394531, 183.8042755126953, -47.919654846191406, 179.84124755859375, -2.0849781036376953, 156.1978759765625, 139.87142944335938, 232.32717895507812, 160.72959899902344, 63.47467041015625, -49.165435791015625, -58.65290069580078, 120.74409484863281, 142.35101318359375, -56.35881042480469, 183.5943145751953, 8.767839431762695, 71.56782531738281, 180.55909729003906, 2.9355239868164062, 20.03595542907715, 62.66419982910156, 195.4792938232422, 1.1879138946533203, 183.14927673339844, 160.23629760742188, -256.4757080078125, 112.77825927734375, 234.350341796875, 46.19683837890625, 123.47196960449219, 0.8089447021484375, 68.85883331298828, 96.174560546875, -32.935157775878906, 104.79600524902344, -34.91490173339844, -24.751914978027344, -66.2216796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000594.npy"}
|
||||
{"epoch": 0.8979591836734694, "step": 595, "batch_size": 64, "mean": 83.78953552246094, "std": 122.50353240966797, "min": -203.4674835205078, "p10": -34.05662536621093, "median": 71.27578353881836, "p90": 223.26018676757812, "max": 387.1507873535156, "pos_frac": 0.78125, "sample": [-4.467140197753906, 75.73206329345703, 387.1507873535156, 111.09893798828125, 2.9791488647460938, 10.452043533325195, 83.75740814208984, 40.5305290222168, 14.561538696289062, 197.93035888671875, 243.11990356445312, 16.810951232910156, 83.04368591308594, -143.47540283203125, 200.04481506347656, 4.833976745605469, -1.6436023712158203, 7.633270263671875, 205.2305145263672, 54.567420959472656, 24.87981414794922, 116.17262268066406, 238.8151092529297, 9.393669128417969, 294.52227783203125, 51.817649841308594, 107.83138275146484, 92.32575988769531, 219.9696807861328, 140.0859832763672, -2.2525863647460938, -126.46849060058594, 56.38818359375, -36.214385986328125, 191.08192443847656, 185.46620178222656, 255.53208923339844, -185.91537475585938, -195.65155029296875, 221.76937866210938, 189.73056030273438, 200.50625610351562, 164.03070068359375, 52.191864013671875, -94.14266967773438, 205.3838653564453, 103.48468017578125, 156.0967254638672, -29.0218505859375, 23.520374298095703, -3.6595211029052734, 236.25411987304688, 161.84432983398438, 35.586204528808594, -14.285354614257812, 34.9071044921875, 222.4563446044922, 185.86541748046875, 66.81950378417969, 6.409271240234375, -0.7999591827392578, -203.4674835205078, 189.7741241455078, 223.6046905517578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000595.npy"}
|
||||
{"epoch": 0.8994708994708994, "step": 596, "batch_size": 64, "mean": 35.489410400390625, "std": 99.41326141357422, "min": -189.0868682861328, "p10": -81.19663238525388, "median": 17.45632266998291, "p90": 187.6690185546875, "max": 256.2705078125, "pos_frac": 0.578125, "sample": [-94.31803894042969, 1.7197647094726562, 210.5271759033203, 17.899330139160156, 37.226280212402344, -90.67449188232422, -16.24325942993164, 0.19103240966796875, -59.081626892089844, 200.8744659423828, -114.79833221435547, 60.53969192504883, -103.52703857421875, -28.617401123046875, 256.2705078125, 186.7432403564453, 168.2152557373047, 97.30363464355469, -133.4662322998047, 55.442161560058594, -45.1368408203125, 188.06578063964844, 53.7974853515625, 121.2723159790039, 198.58790588378906, 179.7992706298828, 114.96796417236328, -15.293554306030273, 83.66826629638672, -125.49871826171875, -58.38767623901367, 88.14045715332031, 24.011329650878906, 17.013315200805664, 93.28174591064453, 4.295017242431641, 56.146095275878906, 237.0421600341797, 12.929988861083984, -16.987815856933594, -23.886985778808594, -11.147445678710938, -20.248004913330078, -189.0868682861328, -28.906234741210938, -8.734966278076172, 21.33709716796875, -17.655147552490234, 207.10580444335938, 83.66888427734375, 148.5490264892578, -3.18450927734375, -24.550559997558594, -52.34014129638672, -22.374183654785156, 172.78578186035156, 22.161148071289062, 133.44346618652344, -39.5235595703125, 42.16314697265625, 23.987895965576172, 70.31678771972656, -23.049957275390625, -53.4488525390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000596.npy"}
|
||||
{"epoch": 0.9009826152683296, "step": 597, "batch_size": 64, "mean": 64.47881317138672, "std": 100.2386245727539, "min": -150.59994506835938, "p10": -55.24127502441405, "median": 36.627309799194336, "p90": 198.74408264160158, "max": 254.08384704589844, "pos_frac": 0.71875, "sample": [-7.196369171142578, -33.309181213378906, -99.39096069335938, -23.267166137695312, 208.11685180664062, -19.891502380371094, 37.771488189697266, -95.53714752197266, -47.44236755371094, 203.03067016601562, 8.80367660522461, 122.71533203125, 1.6250858306884766, 194.17547607421875, -58.58366394042969, 9.694990158081055, 190.76577758789062, 213.22154235839844, 187.52935791015625, -11.801324844360352, 102.92586517333984, 12.587844848632812, -86.28153991699219, 30.229087829589844, 189.84298706054688, 172.61883544921875, 9.054044723510742, -74.61587524414062, 185.16818237304688, 7.622797012329102, 152.8101348876953, 43.8709716796875, 203.34271240234375, 35.483131408691406, 95.37967681884766, 107.61898803710938, -20.361618041992188, 118.07229614257812, 30.238359451293945, 146.18954467773438, 2.4429969787597656, -109.28115844726562, 11.404930114746094, -10.455198287963867, -2.1112003326416016, -7.51191520690918, 5.927278518676758, 61.853851318359375, 166.72686767578125, 70.84798431396484, 200.70205688476562, 4.471714019775391, 136.36041259765625, -12.610748291015625, -150.59994506835938, 132.45132446289062, 1.6073341369628906, 144.9370574951172, 254.08384704589844, 241.8760986328125, 74.32238006591797, 183.87025451660156, 93.11774444580078, 189.38307189941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000597.npy"}
|
||||
{"epoch": 0.9024943310657596, "step": 598, "batch_size": 64, "mean": 40.67690658569336, "std": 104.20198822021484, "min": -192.3885955810547, "p10": -66.49484863281249, "median": 20.691414833068848, "p90": 189.0698959350586, "max": 312.2001037597656, "pos_frac": 0.65625, "sample": [-3.931917190551758, -130.93984985351562, 5.419914245605469, 38.031394958496094, 72.96868133544922, 2.797161102294922, 6.4864654541015625, 195.64105224609375, 190.35694885253906, -189.11497497558594, 68.41944122314453, 41.814971923828125, 42.81137466430664, -36.94181823730469, 138.8722381591797, -25.83789825439453, 241.77481079101562, -38.29327392578125, 172.9556884765625, 40.43623733520508, -16.487695693969727, -192.3885955810547, -40.64369201660156, 108.44236755371094, 65.80633544921875, 46.281639099121094, 64.67198181152344, 63.012481689453125, 5.268951416015625, -60.4272346496582, -63.57855224609375, -127.84391021728516, 9.007822036743164, 123.53709411621094, 221.29647827148438, -4.816638946533203, -3.5168609619140625, 6.974132537841797, 91.85707092285156, 121.66827392578125, 173.59536743164062, 25.985015869140625, -3.999055862426758, 197.55990600585938, -26.06072998046875, 140.0106658935547, 28.827957153320312, 0.611846923828125, 158.53843688964844, 17.394454956054688, -67.74468994140625, -74.43372344970703, 8.070549011230469, 6.91632080078125, 23.988374710083008, -29.086318969726562, 168.9544677734375, 121.96479797363281, -38.08052062988281, -27.380027770996094, 312.2001037597656, 199.19009399414062, -151.61624145507812, 186.0667724609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000598.npy"}
|
||||
{"epoch": 0.9040060468631897, "step": 599, "batch_size": 64, "mean": 50.07246780395508, "std": 106.55410766601562, "min": -186.82955932617188, "p10": -77.8134712219238, "median": 41.89152908325195, "p90": 188.89234161376953, "max": 289.03558349609375, "pos_frac": 0.640625, "sample": [-51.167724609375, -28.377565383911133, 53.43488311767578, 53.02294158935547, -47.228187561035156, 95.8537826538086, 27.692737579345703, 252.60418701171875, -104.66650390625, -186.82955932617188, -5.950721740722656, -57.45728302001953, 14.421249389648438, 66.8143310546875, 126.58666229248047, -2.9628372192382812, -122.69371032714844, 1.6726722717285156, 1.4425697326660156, -39.19805908203125, 191.5542755126953, 136.54446411132812, 83.4022216796875, -173.24142456054688, -0.98297119140625, -12.250930786132812, 20.240219116210938, 189.19100952148438, -86.53755187988281, 51.574615478515625, 223.4834747314453, 188.19544982910156, -125.60213470458984, -35.679283142089844, 59.92939376831055, 16.516952514648438, 107.24846649169922, -29.962127685546875, 88.69377899169922, 73.97210693359375, 160.0480499267578, -26.61003875732422, 32.20844268798828, -50.2637939453125, 176.62362670898438, 64.76351928710938, 201.28799438476562, -11.560775756835938, -127.54450988769531, 15.421199798583984, 166.80670166015625, 238.69448852539062, 84.97537231445312, 170.50833129882812, 178.9764862060547, 27.571496963500977, 289.03558349609375, 180.54141235351562, 155.2650146484375, -4.28288459777832, -15.923416137695312, 82.15848541259766, 118.43135070800781, 84.20199584960938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000599.npy"}
|
||||
{"epoch": 0.9055177626606198, "step": 600, "batch_size": 64, "mean": 59.009403228759766, "std": 108.83338165283203, "min": -190.80426025390625, "p10": -67.29499588012695, "median": 36.60777473449707, "p90": 218.25529937744142, "max": 341.5312805175781, "pos_frac": 0.734375, "sample": [139.74386596679688, 64.16706085205078, 4.413858413696289, 9.570426940917969, 218.34011840820312, -17.592330932617188, 46.29457473754883, -129.79176330566406, 172.114990234375, 231.4666290283203, -17.134681701660156, -1.9524574279785156, 58.391746520996094, 127.13728332519531, 0.98828125, 21.306550979614258, -82.63652038574219, -87.53193664550781, -95.6558837890625, 187.9868927001953, 23.77591323852539, 88.06524658203125, 137.9397430419922, 118.70854187011719, -55.79694366455078, 293.22259521484375, 113.2875747680664, 189.42568969726562, -23.058609008789062, 208.24835205078125, 225.56820678710938, 3.2491226196289062, -190.80426025390625, -14.071739196777344, 85.79150390625, -45.432552337646484, -15.079010009765625, 146.50741577148438, 1.3293495178222656, 341.5312805175781, -7.642784118652344, 161.10018920898438, 5.256187438964844, 219.7010498046875, 0.8281650543212891, 9.969188690185547, -72.22273254394531, -21.160493850708008, -147.80126953125, 66.27633666992188, 2.6177139282226562, 87.81092834472656, 70.58702850341797, 26.920974731445312, 113.26300048828125, 11.421630859375, 57.72801208496094, 61.5385627746582, 93.61216735839844, 218.05738830566406, 1.3019027709960938, 53.85279083251953, 17.053558349609375, 264.49810791015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000600.npy"}
|
||||
{"epoch": 0.9070294784580499, "step": 601, "batch_size": 64, "mean": 80.06204986572266, "std": 119.23040008544922, "min": -207.7842254638672, "p10": -46.09972763061522, "median": 87.60953521728516, "p90": 209.8272674560547, "max": 333.1385498046875, "pos_frac": 0.71875, "sample": [166.07737731933594, 14.787437438964844, 200.28854370117188, -133.44268798828125, 210.00640869140625, 123.76824951171875, 326.74859619140625, -93.99691772460938, 73.46687316894531, -9.593505859375, 220.17955017089844, -162.56845092773438, 191.7122802734375, 21.522119522094727, -35.73406982421875, 107.51990509033203, 178.9900665283203, 293.5194091796875, -50.542152404785156, 165.94158935546875, 209.40927124023438, 4.997032165527344, 1.4890899658203125, 232.08160400390625, 204.17938232421875, 157.6374969482422, -1.492940902709961, -26.740062713623047, 137.26153564453125, 44.62736511230469, 17.16595458984375, 148.03758239746094, 179.59103393554688, -25.333229064941406, -4.795953750610352, 123.61662292480469, -7.510383605957031, 136.76214599609375, 71.77168273925781, 10.081380844116211, -68.78175354003906, 113.03912353515625, 15.675182342529297, 189.34051513671875, 101.752197265625, 151.70570373535156, 28.91501235961914, -26.190513610839844, -28.329294204711914, 7.5962982177734375, -4.829460144042969, 262.9949951171875, -0.6467685699462891, 4.712757110595703, 182.0897216796875, 196.85830688476562, 333.1385498046875, 123.25709533691406, 176.93133544921875, 2.4881973266601562, -152.07044982910156, 171.2376251220703, 129.3840789794922, -207.7842254638672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000601.npy"}
|
||||
{"epoch": 0.90854119425548, "step": 602, "batch_size": 64, "mean": 74.938720703125, "std": 99.75630187988281, "min": -190.45663452148438, "p10": -9.619665908813474, "median": 51.21529960632324, "p90": 209.59068145751954, "max": 324.1199035644531, "pos_frac": 0.8125, "sample": [-7.416168212890625, 155.60623168945312, 215.51416015625, 324.1199035644531, 78.75178527832031, 40.440887451171875, 67.77120971679688, 2.7757091522216797, 42.49205017089844, -7.053623199462891, 227.99636840820312, 7.906360626220703, 210.92408752441406, 177.64395141601562, -110.24119567871094, 206.47940063476562, -156.07977294921875, 235.03726196289062, 141.77989196777344, -10.564022064208984, 12.911865234375, -3.2050094604492188, 19.019577026367188, -7.000873565673828, 39.11711120605469, 170.88150024414062, 87.32185363769531, 175.287841796875, 24.602298736572266, 103.71388244628906, 16.46451187133789, 163.78843688964844, 98.60088348388672, 49.36464309692383, 21.86557388305664, 21.84317398071289, 26.169025421142578, 53.065956115722656, 187.0147247314453, 87.29500579833984, 20.55276870727539, 150.1022186279297, 180.32594299316406, 169.8456573486328, 17.69878387451172, -190.45663452148438, 97.72197723388672, 70.38526916503906, 3.6422195434570312, 19.047630310058594, -13.280590057373047, -2.4536590576171875, 179.70947265625, -29.746864318847656, 28.584388732910156, 197.1981964111328, 83.33507537841797, 83.16368103027344, 220.08889770507812, 215.1824188232422, -102.87389373779297, 165.18663024902344, 16.601943969726562, 24.510284423828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000602.npy"}
|
||||
{"epoch": 0.91005291005291, "step": 603, "batch_size": 64, "mean": 71.90690612792969, "std": 108.56535339355469, "min": -207.72061157226562, "p10": -54.31144638061523, "median": 55.121543884277344, "p90": 190.68187255859374, "max": 317.0945129394531, "pos_frac": 0.75, "sample": [187.91673278808594, 135.55477905273438, 184.29354858398438, -31.185585021972656, -111.890869140625, 55.16566467285156, 54.85730743408203, 120.55081176757812, -6.18817138671875, 10.326217651367188, 186.7555389404297, 70.31990051269531, -3.2675094604492188, -92.99327850341797, 28.690086364746094, 282.5052795410156, -111.57768249511719, 23.41655731201172, 20.188003540039062, 317.0945129394531, 80.98543548583984, 26.175872802734375, 190.74978637695312, 83.19062805175781, -9.119831085205078, 55.077423095703125, 183.42845153808594, -61.039398193359375, 168.13694763183594, -188.5652313232422, -0.25925445556640625, 19.89495849609375, 152.1615753173828, 120.70343017578125, -56.77210235595703, 173.12255859375, 51.31074142456055, 34.69724655151367, -8.87306022644043, 193.11331176757812, 98.90028381347656, 171.10989379882812, 122.35722351074219, 63.43456268310547, 126.98201751708984, 205.27450561523438, 42.30647277832031, -48.569915771484375, 79.61921691894531, 20.359193801879883, 27.880447387695312, 54.78773498535156, 176.01136779785156, 188.4794158935547, 91.47283172607422, -18.259841918945312, 7.817409515380859, -207.72061157226562, 190.52340698242188, 189.33932495117188, 199.6142578125, 2.7849597930908203, 302.20843505859375, -13.3216552734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000603.npy"}
|
||||
{"epoch": 0.9115646258503401, "step": 604, "batch_size": 64, "mean": 63.492881774902344, "std": 113.47854614257812, "min": -238.23521423339844, "p10": -78.00426330566405, "median": 49.43279266357422, "p90": 204.1307571411133, "max": 377.89263916015625, "pos_frac": 0.703125, "sample": [119.44631958007812, 166.48175048828125, -12.285564422607422, -62.189300537109375, 9.652446746826172, 164.5650177001953, -10.291240692138672, 0.49659156799316406, 192.82180786132812, -28.892501831054688, 37.20965576171875, -10.748634338378906, 2.827688217163086, 170.25390625, -81.391357421875, -30.1171875, 181.5072479248047, 69.34121704101562, 46.49724578857422, 230.28863525390625, -168.99298095703125, -66.3817138671875, 33.279754638671875, -90.62994384765625, 154.39639282226562, 10.833032608032227, 11.346185684204102, 88.12328338623047, 62.37370300292969, -238.23521423339844, 45.58389663696289, -83.08448028564453, 72.18659210205078, -103.007568359375, 186.84445190429688, -23.671730041503906, 100.14974975585938, -37.69061279296875, 204.2701416015625, 152.27218627929688, 102.26081848144531, 203.80552673339844, 377.89263916015625, 31.43372344970703, 174.01919555664062, 105.51217651367188, 32.20185089111328, 197.57347106933594, 213.44419860839844, -70.10104370117188, 219.74034118652344, -1.8304595947265625, 98.49825286865234, 52.36833953857422, 149.73648071289062, 55.180381774902344, 151.8167724609375, 39.826942443847656, 2.5279598236083984, 219.6555633544922, 164.97637939453125, -32.70713806152344, -98.59208679199219, 208.86549377441406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000604.npy"}
|
||||
{"epoch": 0.9130763416477702, "step": 605, "batch_size": 64, "mean": 65.6489486694336, "std": 110.85015869140625, "min": -210.51025390625, "p10": -54.87702445983886, "median": 60.6675968170166, "p90": 198.8378875732422, "max": 286.75823974609375, "pos_frac": 0.71875, "sample": [85.38654327392578, 107.1827392578125, -150.29298400878906, 286.75823974609375, -18.310890197753906, 205.4713134765625, 70.53929138183594, 209.35928344726562, 116.75286102294922, -29.23711395263672, 190.7506103515625, -24.961593627929688, 46.207420349121094, -26.058937072753906, 2.170642852783203, -92.36320495605469, 82.74339294433594, 171.57479858398438, 153.1314697265625, 57.53371047973633, -42.90083312988281, 23.84033203125, 151.35488891601562, -210.51025390625, 1.7464828491210938, 146.23472595214844, 193.20492553710938, 183.30760192871094, 55.221839904785156, 63.801483154296875, 167.41934204101562, 45.030906677246094, -203.84274291992188, 170.27716064453125, -2.974609375, 217.89263916015625, 187.89646911621094, 137.84454345703125, 24.097305297851562, 114.74594116210938, -77.76557922363281, -15.330265045166016, 188.05294799804688, 40.35350036621094, 201.25201416015625, 26.835426330566406, 207.20620727539062, 180.04063415527344, 185.58279418945312, -29.43408203125, 245.97970581054688, 69.81239318847656, 106.74336242675781, -41.03168487548828, -17.55047607421875, 15.072961807250977, 16.2991886138916, -174.54299926757812, 45.13127136230469, -60.00967788696289, 10.942201614379883, 111.14732360839844, -15.147598266601562, 113.86740112304688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000605.npy"}
|
||||
{"epoch": 0.9145880574452003, "step": 606, "batch_size": 64, "mean": 80.81857299804688, "std": 112.12389373779297, "min": -146.55123901367188, "p10": -56.24478340148925, "median": 74.19674301147461, "p90": 229.21256408691406, "max": 348.3960876464844, "pos_frac": 0.75, "sample": [-50.883766174316406, 227.80075073242188, 63.81950378417969, 250.65863037109375, -94.39225006103516, 27.55670928955078, 179.99673461914062, 54.17408752441406, -3.5839672088623047, 166.2779998779297, 193.14175415039062, -26.317611694335938, 116.656005859375, 229.817626953125, 190.458740234375, -146.55123901367188, 122.41514587402344, 76.89865112304688, 6.65369987487793, 163.10995483398438, 348.3960876464844, 19.729583740234375, 57.011531829833984, 172.03506469726562, -50.84361267089844, -81.21114349365234, 286.2303466796875, -37.50218200683594, 123.5191650390625, 142.43728637695312, 115.08634948730469, 2.1823654174804688, -105.24657440185547, 183.86050415039062, 96.14222717285156, 2.5092029571533203, 72.29359436035156, 0.5197963714599609, 164.64810180664062, -0.8726730346679688, 200.71531677246094, 243.23028564453125, 75.21910858154297, 141.03970336914062, 6.3158111572265625, 281.26727294921875, -20.632156372070312, 73.17437744140625, -95.01248931884766, -58.542362213134766, 189.38516235351562, 41.992645263671875, -116.79863739013672, 25.334136962890625, -20.871803283691406, 142.05819702148438, 88.18692779541016, -18.87850570678711, 121.38552856445312, 263.92376708984375, 145.76832580566406, 160.48028564453125, 44.900543212890625, 0.11541938781738281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000606.npy"}
|
||||
{"epoch": 0.9160997732426304, "step": 607, "batch_size": 64, "mean": 69.12503051757812, "std": 111.21524047851562, "min": -227.81320190429688, "p10": -73.85579071044921, "median": 61.72102737426758, "p90": 207.9324569702149, "max": 312.14080810546875, "pos_frac": 0.734375, "sample": [2.0459823608398438, 32.881195068359375, -147.60841369628906, 97.62844848632812, 144.0758819580078, 5.461494445800781, 143.58245849609375, 87.86551666259766, -227.81320190429688, -54.74761199951172, -3.990528106689453, -66.19657135009766, 124.09112548828125, 191.53977966308594, 45.46800231933594, -81.46856689453125, -13.714973449707031, 8.788671493530273, 55.36890411376953, 216.9801788330078, 30.819793701171875, 226.29598999023438, 140.58901977539062, 132.7378692626953, 123.3223876953125, 32.16443634033203, -22.16077423095703, 172.03001403808594, -9.447547912597656, -20.917892456054688, 216.01910400390625, 107.0255355834961, 133.99124145507812, 160.31024169921875, -90.86099243164062, 213.146728515625, 312.14080810546875, -9.88980484008789, 165.0126495361328, 18.443283081054688, -65.45694732666016, 10.629035949707031, 36.47796630859375, 165.40440368652344, 107.53028869628906, 4.728126525878906, -94.77035522460938, 156.1630859375, 252.5220947265625, 178.7178497314453, 1.0263328552246094, 36.3367919921875, 142.59298706054688, 226.348388671875, 54.01204299926758, 195.7658233642578, -2.410625457763672, 88.1729736328125, 177.3191680908203, 193.60569763183594, 68.07315063476562, -174.12982177734375, -77.13831329345703, 151.47201538085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000607.npy"}
|
||||
{"epoch": 0.9176114890400605, "step": 608, "batch_size": 64, "mean": 72.25233459472656, "std": 101.73524475097656, "min": -159.03131103515625, "p10": -36.77982025146483, "median": 43.25288391113281, "p90": 200.17517395019533, "max": 314.50787353515625, "pos_frac": 0.765625, "sample": [185.15345764160156, -90.53945922851562, -0.5713539123535156, 191.09759521484375, 0.6613445281982422, -94.52871704101562, 155.1834716796875, 6.91253662109375, 28.445354461669922, -126.16346740722656, 143.4250946044922, 105.3081283569336, 149.12135314941406, 190.24107360839844, 72.08990478515625, 143.5989990234375, 246.05043029785156, -159.03131103515625, -6.426576614379883, 17.40505599975586, 60.85429382324219, -17.237545013427734, -0.7198429107666016, 220.08131408691406, 183.35250854492188, 23.808834075927734, 2.1849746704101562, 101.29737854003906, 10.412025451660156, 202.50389099121094, 223.37774658203125, -43.256874084472656, 46.11376953125, 126.05963134765625, 40.391998291015625, 111.4906005859375, 174.385986328125, -21.66669464111328, -68.22776794433594, -9.503631591796875, -1.7443923950195312, 18.023597717285156, 64.18019104003906, 33.135398864746094, -12.023178100585938, 143.25363159179688, 4.85772705078125, 176.67059326171875, 314.50787353515625, 180.17724609375, 106.80670166015625, 28.257675170898438, 47.46485137939453, 190.93728637695312, 3.203685760498047, 157.896484375, -85.10916137695312, 194.7415008544922, 36.3359375, 7.128509521484375, 27.596092224121094, 208.7424774169922, 28.386886596679688, 227.58673095703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000608.npy"}
|
||||
{"epoch": 0.9191232048374905, "step": 609, "batch_size": 64, "mean": 78.45690155029297, "std": 96.81139373779297, "min": -231.88946533203125, "p10": -48.76368408203125, "median": 84.78770446777344, "p90": 190.7690414428711, "max": 340.92828369140625, "pos_frac": 0.796875, "sample": [216.13511657714844, 185.93418884277344, 81.82518768310547, 10.451133728027344, 110.96771240234375, 25.626373291015625, 183.98744201660156, 120.94483947753906, -28.189483642578125, -2.2298355102539062, -51.03105163574219, 126.92453002929688, -18.529281616210938, 15.321979522705078, 340.92828369140625, 164.97549438476562, 200.5612335205078, 97.83480834960938, 79.95765686035156, 214.54403686523438, -56.97187042236328, 111.6212158203125, 93.10330200195312, 77.96484375, -80.90408325195312, 207.010009765625, 165.54156494140625, 93.59729766845703, -108.28929138183594, 150.68670654296875, 66.8472671508789, -1.0601062774658203, 184.38375854492188, 90.78897857666016, -30.944183349609375, 122.77460479736328, 67.36927795410156, 0.35746192932128906, 166.8055419921875, 126.80226135253906, 78.85346984863281, -52.76095199584961, 75.4944839477539, 201.16925048828125, 188.40896606445312, -231.88946533203125, 14.57052230834961, 87.67141723632812, 88.68548583984375, 7.51478385925293, 45.85829544067383, 108.66163635253906, 124.31231689453125, 64.83052825927734, 2.579763412475586, 15.240219116210938, 191.78050231933594, 120.3208999633789, 176.9766845703125, -52.064491271972656, 166.15353393554688, 81.90399169921875, 36.01806640625, -43.47315979003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000609.npy"}
|
||||
{"epoch": 0.9206349206349206, "step": 610, "batch_size": 64, "mean": 77.01236724853516, "std": 113.82139587402344, "min": -183.20184326171875, "p10": -38.09347381591797, "median": 59.33132553100586, "p90": 228.40684204101564, "max": 338.33087158203125, "pos_frac": 0.6875, "sample": [58.46063995361328, 338.33087158203125, 232.87161254882812, 150.21949768066406, 83.19060516357422, 199.08555603027344, -0.3771209716796875, 6.014589309692383, -129.79420471191406, 10.710281372070312, 83.2441635131836, -105.1723403930664, 109.48484802246094, 33.03602600097656, -14.752151489257812, 60.20201110839844, 187.61720275878906, -32.92530822753906, -34.652679443359375, -20.93067169189453, 42.701454162597656, -85.28709411621094, -14.156303405761719, 145.1590576171875, -39.56809997558594, -64.28770446777344, 34.52580261230469, 194.60731506347656, 66.15754699707031, 236.17544555664062, 56.375732421875, 325.2720642089844, 226.00653076171875, 199.53897094726562, -32.03523254394531, 177.51467895507812, 192.01898193359375, -1.9173412322998047, 180.53146362304688, 25.07025146484375, 18.037933349609375, 156.90675354003906, 68.62682342529297, -183.20184326171875, 242.42333984375, -6.323825836181641, -31.53691864013672, -24.92306900024414, 186.4553985595703, 229.435546875, 111.07832336425781, 98.74365234375, 55.04835510253906, -5.826845169067383, 1.681020736694336, -22.840110778808594, -102.34028625488281, 192.23089599609375, 214.8908233642578, 100.97244262695312, 84.50111389160156, 58.02388000488281, 256.19927978515625, 152.26205444335938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000610.npy"}
|
||||
{"epoch": 0.9221466364323507, "step": 611, "batch_size": 64, "mean": 61.01106643676758, "std": 93.90119171142578, "min": -174.52073669433594, "p10": -41.398797988891594, "median": 60.13778305053711, "p90": 188.3390609741211, "max": 251.02883911132812, "pos_frac": 0.71875, "sample": [108.21511840820312, 117.44544982910156, 34.36456298828125, 51.67544937133789, 161.80023193359375, -3.5854263305664062, 6.0574951171875, 57.61212158203125, -3.0607032775878906, 78.57936096191406, 2.319629669189453, 148.60252380371094, 78.32007598876953, 119.77012634277344, -6.459083557128906, 10.522163391113281, -99.68582153320312, 191.1596221923828, -36.38895034790039, 251.02883911132812, 179.5926971435547, 72.55467987060547, 150.14633178710938, -75.18426513671875, -60.29477310180664, -7.102272033691406, 193.3045654296875, 73.07401275634766, 107.41168975830078, 235.06387329101562, -1.816650390625, -43.545875549316406, 53.892730712890625, 201.78353881835938, -33.62480163574219, -6.9375152587890625, -174.52073669433594, 240.5182647705078, 62.66344451904297, 36.93745422363281, 136.56195068359375, 71.96356201171875, -161.15249633789062, 63.62852478027344, 35.54082489013672, 92.89258575439453, 90.2144775390625, 45.648834228515625, 155.77053833007812, 176.1259765625, 72.59225463867188, 164.02178955078125, -0.6334552764892578, 74.29895782470703, 4.9061737060546875, 29.643020629882812, 200.9386749267578, -112.93708038330078, 181.75775146484375, 41.947967529296875, -24.75909996032715, 7.225654602050781, -4.631504058837891, 90.93310546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000611.npy"}
|
||||
{"epoch": 0.9236583522297808, "step": 612, "batch_size": 64, "mean": 89.00161743164062, "std": 105.58413696289062, "min": -224.2931671142578, "p10": -19.951713943481444, "median": 86.86270141601562, "p90": 226.86520538330086, "max": 295.57958984375, "pos_frac": 0.78125, "sample": [-3.327850341796875, 9.501373291015625, 123.69589233398438, 39.012306213378906, -37.67103576660156, 5.580009460449219, 171.40325927734375, 135.100341796875, -11.373870849609375, 38.57301330566406, 151.21731567382812, 109.33140563964844, 68.13732147216797, 85.60072326660156, 157.70709228515625, 51.591766357421875, 61.27970886230469, -1.2416915893554688, 87.89442443847656, 160.38394165039062, 164.32266235351562, -43.576332092285156, 147.53463745117188, 36.57763671875, 85.83097839355469, 37.09772872924805, 245.96986389160156, -17.45645523071289, -35.383602142333984, 21.841880798339844, 82.09900665283203, 201.60902404785156, 3.770191192626953, -7.916837692260742, -21.02111053466797, 209.00628662109375, 19.36402130126953, 192.64266967773438, 263.74774169921875, -150.8980255126953, 242.01573181152344, 77.23207092285156, 201.5275115966797, -136.30381774902344, 265.6412048339844, 137.1252899169922, 198.31051635742188, -16.418989181518555, 166.3230743408203, 234.51902770996094, 99.97998809814453, -9.269157409667969, 270.4869079589844, 102.34405517578125, 38.668373107910156, 295.57958984375, 133.26959228515625, 79.1636734008789, 96.54997253417969, -224.2931671142578, 105.9593734741211, 154.00164794921875, 164.43641662597656, 181.6973419189453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000612.npy"}
|
||||
{"epoch": 0.9251700680272109, "step": 613, "batch_size": 64, "mean": 79.65707397460938, "std": 103.75579833984375, "min": -211.35977172851562, "p10": -26.69921493530273, "median": 66.78240966796875, "p90": 203.6477783203125, "max": 248.75442504882812, "pos_frac": 0.75, "sample": [-13.32440185546875, 52.86741638183594, -12.171905517578125, 52.09696960449219, 110.74674224853516, 155.14291381835938, 150.16036987304688, 89.52981567382812, -28.328773498535156, -6.252908706665039, 55.24549102783203, 59.85905838012695, -186.3128662109375, 162.2955780029297, 161.65744018554688, 64.84501647949219, -34.614540100097656, 2.081266403198242, 246.60165405273438, 183.513427734375, 14.464134216308594, -18.02589988708496, -211.35977172851562, 15.815614700317383, 183.30364990234375, 72.64370727539062, 174.9864501953125, 3.0166378021240234, 17.222557067871094, 20.009056091308594, 239.65139770507812, 56.61677932739258, 112.27117919921875, -2.764636993408203, -11.507225036621094, 68.71980285644531, -74.78482818603516, 181.46070861816406, 121.93603515625, 224.82736206054688, 178.47994995117188, 202.9549560546875, 57.730186462402344, 182.08184814453125, 243.90736389160156, 5.526496887207031, 248.75442504882812, -2.086824417114258, -22.89691162109375, 202.87998962402344, -18.728439331054688, -57.593780517578125, 181.7475128173828, 240.27833557128906, 112.72145080566406, 155.87828063964844, 13.805011749267578, 203.9447021484375, 138.8710174560547, 194.90841674804688, 49.085853576660156, 88.20968627929688, 110.68175506591797, -67.22894287109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000613.npy"}
|
||||
{"epoch": 0.926681783824641, "step": 614, "batch_size": 64, "mean": 74.38656616210938, "std": 101.89347839355469, "min": -186.32269287109375, "p10": -53.11169204711913, "median": 83.10820770263672, "p90": 185.66541442871093, "max": 311.05859375, "pos_frac": 0.796875, "sample": [186.36148071289062, 136.05197143554688, 25.351322174072266, 147.88861083984375, 99.16149139404297, 122.23775482177734, 8.066829681396484, 0.34842681884765625, 95.75726318359375, 70.25863647460938, 221.5135498046875, 139.2728271484375, 235.05458068847656, 207.44837951660156, 141.57083129882812, 166.5415802001953, 136.37220764160156, 76.47576141357422, 129.92922973632812, 137.08399963378906, -146.3898162841797, 6.7957611083984375, -39.1015625, -186.32269287109375, 311.05859375, 9.148414611816406, -5.700841903686523, -28.4595947265625, 174.59169006347656, -83.92733764648438, -77.29439544677734, 13.610462188720703, 7.431995391845703, 179.21359252929688, 54.18887710571289, 7.564912796020508, 147.43048095703125, 111.5335693359375, -163.49769592285156, 152.76031494140625, -4.348884582519531, 176.81005859375, 160.0556640625, 143.07257080078125, 107.33267974853516, 153.56520080566406, 22.66820526123047, 82.26278686523438, 159.09786987304688, 51.35771179199219, 30.47002410888672, 73.35382843017578, -57.94554138183594, -41.83271026611328, 21.382389068603516, 222.92095947265625, 88.80863952636719, -15.944267272949219, 184.041259765625, 24.66326904296875, 83.95362854003906, 48.44124221801758, -85.95840454101562, 205.130859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000614.npy"}
|
||||
{"epoch": 0.9281934996220711, "step": 615, "batch_size": 64, "mean": 76.60618591308594, "std": 123.95965576171875, "min": -255.57757568359375, "p10": -71.85657348632812, "median": 58.46638107299805, "p90": 226.31297454833992, "max": 348.72552490234375, "pos_frac": 0.78125, "sample": [63.56983184814453, 97.06199645996094, 185.83717346191406, 20.959564208984375, 82.76498413085938, 185.75448608398438, 104.06439971923828, -160.2176055908203, 187.45343017578125, 51.621177673339844, -127.65978240966797, 74.40326690673828, -89.09735107421875, 137.3475799560547, 51.97813415527344, -103.00373840332031, 160.4984130859375, 241.7008056640625, 202.225341796875, 5.7053985595703125, 188.66748046875, -35.218040466308594, 257.3091125488281, -2.7675094604492188, 181.48297119140625, 50.712921142578125, -40.25090789794922, 53.36293029785156, 10.01947021484375, 209.4176483154297, 197.565673828125, 64.92496490478516, -13.685935974121094, 50.319522857666016, 5.574378967285156, 191.06622314453125, 239.32388305664062, -61.36798095703125, 23.075485229492188, 187.99021911621094, 233.53009033203125, 247.27392578125, 186.40235900878906, 6.4971466064453125, 168.8309326171875, 17.828174591064453, 43.22846984863281, -56.732513427734375, 46.9951171875, 209.47303771972656, -76.3516845703125, 316.5751953125, 15.732437133789062, 82.36033630371094, -22.400985717773438, 348.72552490234375, 70.39627075195312, 149.5239715576172, 193.0074920654297, 3.44036865234375, 16.853965759277344, 37.04705810546875, -210.35287475585938, -255.57757568359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000615.npy"}
|
||||
{"epoch": 0.9297052154195011, "step": 616, "batch_size": 64, "mean": 65.32906341552734, "std": 103.5947036743164, "min": -166.45828247070312, "p10": -40.549927139282225, "median": 53.64045333862305, "p90": 202.28754272460938, "max": 255.45291137695312, "pos_frac": 0.6875, "sample": [225.32290649414062, 19.603233337402344, -24.5189208984375, -25.9212646484375, 192.22259521484375, -1.9724407196044922, 251.32269287109375, 167.78843688964844, 0.9386024475097656, 123.39823150634766, 52.510581970214844, 11.880744934082031, 67.35066986083984, -166.45828247070312, 32.856014251708984, -41.144004821777344, 54.77032470703125, -39.16374588012695, -18.392494201660156, 219.82501220703125, 55.23883819580078, 199.13839721679688, -13.857576370239258, -17.429170608520508, 203.63717651367188, 1.3270244598388672, -13.056669235229492, 11.370933532714844, -42.23834991455078, 193.63804626464844, 167.8089599609375, 12.991317749023438, 83.80763244628906, 177.8465118408203, 242.54359436035156, 166.07998657226562, -10.77425765991211, 88.04475402832031, -36.990753173828125, 188.90524291992188, 183.41275024414062, 72.09234619140625, 51.921539306640625, 64.30064392089844, 172.51141357421875, 255.45291137695312, -60.038612365722656, 16.77890396118164, -163.951904296875, 14.521720886230469, -26.365863800048828, -117.32658386230469, 96.83089447021484, 74.75634765625, 27.158103942871094, 215.7200927734375, 100.4342269897461, -10.119869232177734, 126.98898315429688, -24.353286743164062, 179.96783447265625, 181.04669189453125, 64.84906005859375, -75.77912139892578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000616.npy"}
|
||||
{"epoch": 0.9312169312169312, "step": 617, "batch_size": 64, "mean": 56.31550216674805, "std": 105.31151580810547, "min": -168.57748413085938, "p10": -57.65571212768554, "median": 45.069175720214844, "p90": 192.34953765869142, "max": 363.5108337402344, "pos_frac": 0.71875, "sample": [41.79236602783203, -7.518817901611328, -39.45707702636719, 34.092323303222656, 229.4160919189453, 121.50544738769531, 211.6441650390625, 183.71453857421875, 8.326976776123047, -93.43184661865234, 65.11080932617188, 29.71221160888672, -60.60227966308594, 1.0543804168701172, 2.4220848083496094, 191.3365478515625, -50.78038787841797, 226.01995849609375, 21.73492431640625, 4.95184326171875, 363.5108337402344, -17.60538101196289, -9.11459732055664, 187.36166381835938, 92.39253234863281, -0.23516464233398438, -1.7067604064941406, 123.47926330566406, 64.47453308105469, -4.652927398681641, 20.342342376708984, 18.702167510986328, 64.86839294433594, 192.78367614746094, 129.73382568359375, 229.79095458984375, 48.345985412597656, -168.57748413085938, -37.326881408691406, 167.0485382080078, 129.14837646484375, 23.382766723632812, -73.8658447265625, -14.80221939086914, 207.44570922851562, 184.15164184570312, -160.28067016601562, -167.00665283203125, -138.85545349121094, 74.4658203125, 117.95755004882812, 140.4315948486328, 53.95039367675781, 11.550359725952148, 173.60888671875, 105.28892517089844, 23.262550354003906, 12.283699035644531, 52.97236633300781, 91.57962036132812, 56.56975555419922, 81.94661712646484, 80.23793029785156, -45.89149475097656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000617.npy"}
|
||||
{"epoch": 0.9327286470143613, "step": 618, "batch_size": 64, "mean": 69.47793579101562, "std": 104.5862045288086, "min": -136.0919952392578, "p10": -42.37414398193358, "median": 44.65385055541992, "p90": 211.3807373046875, "max": 343.0275573730469, "pos_frac": 0.78125, "sample": [267.94921875, 227.36166381835938, 24.923397064208984, 166.39923095703125, -13.378746032714844, 0.6029720306396484, -58.5508918762207, 21.919700622558594, 306.38104248046875, 2.009502410888672, 4.932502746582031, 286.06597900390625, 160.77862548828125, 54.02653503417969, 196.60470581054688, 7.002662658691406, 57.593650817871094, 2.336578369140625, 343.0275573730469, 117.5882568359375, 53.02155303955078, 254.11827087402344, 2.315328598022461, 38.92890167236328, 14.331947326660156, 202.49127197265625, 46.178314208984375, 65.65494537353516, 116.15911102294922, 69.61874389648438, 26.483322143554688, 26.376014709472656, 210.8817138671875, -11.55826187133789, 48.797149658203125, -72.67770385742188, 58.4561767578125, -120.74918365478516, 128.637451171875, 40.04376983642578, -105.31201171875, -1.6092491149902344, 12.709949493408203, -136.0919952392578, 125.73723602294922, -3.9534759521484375, 43.12938690185547, -96.94847106933594, 120.86707305908203, 211.5946044921875, 120.40296173095703, 143.44667053222656, 7.884483337402344, -2.4373321533203125, 89.96073913574219, 39.67265319824219, -19.919052124023438, 91.97808074951172, 154.55413818359375, 177.03790283203125, -26.730026245117188, 145.373046875, -49.078765869140625, 31.235931396484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000618.npy"}
|
||||
{"epoch": 0.9342403628117913, "step": 619, "batch_size": 64, "mean": 75.67935180664062, "std": 109.82978057861328, "min": -284.7474060058594, "p10": -34.13830604553222, "median": 62.59538459777832, "p90": 217.25714416503908, "max": 301.69744873046875, "pos_frac": 0.765625, "sample": [130.9565887451172, 31.757064819335938, 2.685606002807617, 30.099456787109375, 187.85232543945312, 239.20143127441406, 215.78201293945312, -59.616233825683594, -117.99311828613281, 266.0556640625, 198.92709350585938, 70.31048583984375, 189.1729278564453, 13.945837020874023, 87.73922729492188, -10.832115173339844, 70.91179656982422, 68.73648071289062, -5.627466201782227, -8.070831298828125, -116.94451141357422, 250.34603881835938, 29.808731079101562, -60.896820068359375, -8.371118545532227, 179.28094482421875, 165.11788940429688, 201.30604553222656, 4.015897750854492, -28.460357666015625, 6.566398620605469, -284.7474060058594, -12.814682006835938, 240.14077758789062, 142.2236328125, 144.1297149658203, 22.37057876586914, 196.271484375, 217.88934326171875, -104.09654235839844, 126.74797058105469, 240.38577270507812, 111.8780517578125, 0.6596622467041016, 150.0478057861328, 122.11918640136719, 145.8773651123047, 160.359130859375, 163.76776123046875, 32.3018684387207, 52.872703552246094, 56.454288482666016, 13.803451538085938, 54.25694274902344, 301.69744873046875, 17.468177795410156, 70.40251159667969, 116.4516830444336, -6.622976303100586, 17.62285041809082, -9.3448486328125, 119.16382598876953, 36.54911804199219, -36.571712493896484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000619.npy"}
|
||||
{"epoch": 0.9357520786092215, "step": 620, "batch_size": 64, "mean": 59.37013244628906, "std": 103.3785629272461, "min": -189.03057861328125, "p10": -56.8649444580078, "median": 34.31442928314209, "p90": 202.7143661499024, "max": 249.5632781982422, "pos_frac": 0.703125, "sample": [17.45838165283203, -177.01779174804688, -0.48184967041015625, 147.62832641601562, -78.11481475830078, -0.22278594970703125, 60.54136657714844, 208.9077911376953, -12.988533020019531, 97.0765609741211, 183.26158142089844, 152.00689697265625, 132.0907440185547, 116.56716918945312, -6.471893310546875, 248.76626586914062, 121.7509765625, 53.476654052734375, -23.947677612304688, 6.000480651855469, -189.03057861328125, 29.634654998779297, -62.673370361328125, 242.79959106445312, -71.94927978515625, 162.4273223876953, -38.385780334472656, -20.913543701171875, 207.032958984375, -43.289390563964844, 171.40943908691406, 217.56695556640625, -12.474163055419922, -13.775068283081055, -13.27499008178711, 21.161949157714844, 27.051525115966797, 37.002525329589844, 180.70736694335938, 3.5008316040039062, 168.83822631835938, 44.32215881347656, 116.04822540283203, 75.89934539794922, 19.033023834228516, -88.1837158203125, 249.5632781982422, 209.96078491210938, 1.9899158477783203, 152.90347290039062, 2.023855209350586, 192.6376495361328, -43.31195068359375, 10.196596145629883, -141.2589874267578, 31.626333236694336, 175.1455078125, 28.41241455078125, 94.24070739746094, 0.9278564453125, 129.84832763671875, 113.82958221435547, 119.0583724975586, 55.120872497558594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000620.npy"}
|
||||
{"epoch": 0.9372637944066515, "step": 621, "batch_size": 64, "mean": 62.80183029174805, "std": 115.09664916992188, "min": -212.006591796875, "p10": -62.523480606079104, "median": 67.2626953125, "p90": 210.1694915771485, "max": 387.27410888671875, "pos_frac": 0.71875, "sample": [-0.209747314453125, 25.6124267578125, 22.365684509277344, -62.71468734741211, -59.7923583984375, -150.088134765625, 71.05690002441406, 99.52955627441406, -62.07733154296875, 17.30594825744629, 90.37295532226562, 120.47010803222656, 180.645751953125, -6.085657119750977, 188.3693389892578, 69.37921142578125, 80.70105743408203, 132.2563934326172, 65.14617919921875, 194.2845458984375, 96.06986999511719, 77.3790283203125, 314.85833740234375, -160.10861206054688, 223.39743041992188, 165.361572265625, 74.37435913085938, -212.006591796875, 119.05215454101562, -104.22857666015625, 281.9595642089844, 102.45757293701172, 106.85989379882812, 216.97732543945312, 178.2832489013672, -25.13726806640625, 62.41597366333008, 16.126710891723633, 72.86619567871094, -32.753082275390625, -82.53408813476562, 15.49970817565918, 223.88572692871094, 12.70184326171875, 8.083112716674805, -20.734882354736328, 40.61946105957031, 0.1885528564453125, -11.646224975585938, -6.377132415771484, 4.388059616088867, 177.4873046875, 73.52749633789062, 10.944162368774414, 127.75360870361328, -7.668109893798828, -24.80535125732422, 387.27410888671875, -161.1104736328125, 72.75532531738281, 224.71469116210938, 25.920669555664062, 190.8751983642578, 146.84112548828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000621.npy"}
|
||||
{"epoch": 0.9387755102040817, "step": 622, "batch_size": 64, "mean": 66.50110626220703, "std": 107.25064086914062, "min": -203.63455200195312, "p10": -54.35641403198242, "median": 41.057857513427734, "p90": 197.10107879638673, "max": 259.0691223144531, "pos_frac": 0.71875, "sample": [168.51443481445312, 24.611164093017578, 15.656730651855469, -203.63455200195312, 17.711753845214844, 88.62273406982422, -37.66064453125, 14.605567932128906, -84.55613708496094, -0.9357147216796875, 225.58908081054688, 136.16976928710938, -0.08799171447753906, -46.56636428833008, 199.97119140625, 72.82099151611328, 192.5110626220703, 184.78842163085938, 180.89637756347656, 195.79013061523438, 180.36961364746094, 188.3057861328125, -10.921514511108398, -25.527647018432617, 259.0691223144531, 57.731300354003906, 41.92913818359375, 153.51052856445312, 7.922794342041016, 68.90733337402344, 196.1649932861328, -148.03524780273438, 69.26787567138672, 31.34726333618164, 32.19664764404297, -5.220022201538086, 194.38287353515625, 163.13778686523438, 86.23152160644531, 33.50428009033203, 169.23538208007812, 40.18657684326172, 141.35171508789062, -49.25238800048828, 142.32127380371094, 162.6537322998047, 251.71356201171875, -156.61898803710938, 4.639606475830078, -18.272621154785156, 197.50225830078125, 24.607444763183594, 1.6424026489257812, -56.543853759765625, -7.913507461547852, -87.98452758789062, 247.083984375, -1.3214950561523438, -68.64028930664062, 20.843406677246094, 123.10968017578125, 8.364988327026367, 202.75588989257812, 45.514251708984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000622.npy"}
|
||||
{"epoch": 0.9402872260015117, "step": 623, "batch_size": 64, "mean": 70.07640075683594, "std": 111.34229278564453, "min": -224.9193115234375, "p10": -56.80214462280273, "median": 66.73223495483398, "p90": 204.20729370117186, "max": 352.3788757324219, "pos_frac": 0.71875, "sample": [163.8922119140625, 36.018211364746094, -23.638965606689453, -163.82882690429688, 67.10164642333984, 263.9508361816406, 158.1326904296875, 172.38937377929688, 213.39151000976562, 92.33477020263672, -85.76500701904297, 62.405540466308594, 242.54730224609375, 75.91141510009766, -32.21867370605469, 235.7259521484375, -8.456241607666016, 183.96441650390625, 60.61817169189453, 122.12230682373047, 41.63800048828125, 35.668983459472656, 192.7613067626953, -5.401435852050781, -24.02837371826172, 100.6246109008789, 145.05052185058594, 203.81256103515625, -224.9193115234375, -67.09524536132812, 119.68949890136719, 152.91717529296875, 93.80659484863281, 149.76974487304688, -29.168304443359375, -121.83279418945312, 182.60565185546875, -8.17626953125, 152.41036987304688, 200.19961547851562, 223.38214111328125, 204.37646484375, 78.1314697265625, 4.257741928100586, -20.56012725830078, -13.92799186706543, -53.15831756591797, 29.74763298034668, 6.455623626708984, 127.4588851928711, 103.29693603515625, 352.3788757324219, 100.64881896972656, 98.84138488769531, 6.401756286621094, 14.169303894042969, -58.36378479003906, 7.997230529785156, 51.41168975830078, 4.706090927124023, 172.1327667236328, -4.175422668457031, 66.36282348632812, -144.01397705078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000623.npy"}
|
||||
{"epoch": 0.9417989417989417, "step": 624, "batch_size": 64, "mean": 77.34397888183594, "std": 114.55887603759766, "min": -199.32504272460938, "p10": -36.50309448242187, "median": 41.75185012817383, "p90": 206.32324981689453, "max": 324.33795166015625, "pos_frac": 0.703125, "sample": [-60.283843994140625, -14.463546752929688, 180.70834350585938, 132.2911834716797, 194.9486083984375, 182.05641174316406, -7.67047119140625, -10.409194946289062, -26.08349609375, -50.081321716308594, 77.45482635498047, 19.902603149414062, 128.05487060546875, 150.70509338378906, 181.3827667236328, 176.12130737304688, 15.025575637817383, 278.1120910644531, 268.5440979003906, 27.042518615722656, -30.114429473876953, -74.02262878417969, -19.972360610961914, 198.48068237304688, 9.698665618896484, 178.71189880371094, 46.85765838623047, -35.279258728027344, 29.442352294921875, 38.718963623046875, 324.33795166015625, 205.75827026367188, 122.77957153320312, 317.3269348144531, 11.731281280517578, -34.697757720947266, 206.5653839111328, 171.02125549316406, 287.47296142578125, 177.28134155273438, 65.53666687011719, 200.26211547851562, 116.43590545654297, 12.5748291015625, -78.51347351074219, -21.267379760742188, 250.57920837402344, 2.9292144775390625, -13.648828506469727, 151.78399658203125, 137.54989624023438, 0.027391433715820312, 3.917205810546875, 13.515962600708008, -199.32504272460938, 141.0655517578125, 31.449081420898438, -37.02759552001953, 139.27676391601562, -13.432415008544922, -138.17567443847656, -0.7704849243164062, 44.78473663330078, 165.03009033203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000624.npy"}
|
||||
{"epoch": 0.9433106575963719, "step": 625, "batch_size": 64, "mean": 60.318634033203125, "std": 117.67914581298828, "min": -186.8308563232422, "p10": -71.45821380615234, "median": 29.558765411376953, "p90": 228.90993041992192, "max": 328.47357177734375, "pos_frac": 0.671875, "sample": [59.981964111328125, 175.33636474609375, 43.91920471191406, 19.44146728515625, -0.05266761779785156, 159.07261657714844, 91.19275665283203, 138.21987915039062, 5.296194076538086, 280.8536376953125, 120.27401733398438, -71.91316223144531, 190.23289489746094, -23.296283721923828, 280.6495361328125, 38.611907958984375, 249.0442352294922, 36.66481018066406, -70.39666748046875, -23.77906036376953, 167.26512145996094, 160.90481567382812, 31.12224578857422, -114.62100219726562, -5.4154205322265625, 18.463302612304688, 27.995285034179688, 32.38056182861328, 16.957271575927734, 35.928070068359375, -2.329059600830078, 245.38804626464844, 328.47357177734375, -21.864883422851562, 151.6514892578125, -4.7201995849609375, -4.347352981567383, 308.4108581542969, -5.065696716308594, 18.059463500976562, 90.03436279296875, 172.55960083007812, 233.43710327148438, 118.44554901123047, 19.63617706298828, -19.963088989257812, -183.83914184570312, -82.58566284179688, -121.41827392578125, 179.2007598876953, 9.362457275390625, -164.16677856445312, -186.8308563232422, 217.72349548339844, 154.1112518310547, -0.8548812866210938, 218.34652709960938, 18.718971252441406, 62.714744567871094, -21.685325622558594, 71.29908752441406, 12.722606658935547, 10.389028549194336, -30.955101013183594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000625.npy"}
|
||||
{"epoch": 0.9448223733938019, "step": 626, "batch_size": 64, "mean": 80.90475463867188, "std": 123.89820098876953, "min": -145.41561889648438, "p10": -80.41583175659179, "median": 68.6609878540039, "p90": 204.65193634033204, "max": 487.1852111816406, "pos_frac": 0.734375, "sample": [209.7913818359375, 24.116165161132812, 194.5447235107422, -37.446380615234375, 365.2479248046875, 195.01040649414062, 203.42254638671875, -52.11897277832031, 88.52743530273438, 193.25152587890625, -86.76773071289062, 22.553878784179688, 59.79798126220703, 4.492794036865234, -84.78595733642578, 184.2264862060547, 152.6768798828125, -110.577392578125, 251.76101684570312, 51.716697692871094, -126.36173248291016, 0.73626708984375, 112.0338363647461, 181.4400634765625, 14.485855102539062, -7.523092269897461, 116.39057922363281, 84.511962890625, -34.26094055175781, 487.1852111816406, 117.49083709716797, -70.2188720703125, 225.9285888671875, 187.72213745117188, 71.00526428222656, -106.74588012695312, -4.386474609375, 66.31671142578125, -1.6558818817138672, 143.47708129882812, 10.299354553222656, 11.082468032836914, 198.9974365234375, 37.595272064208984, -20.477264404296875, 3.395265579223633, 104.58332824707031, 182.8844451904297, 61.06938552856445, 179.7435302734375, 151.72720336914062, 17.648448944091797, -145.41561889648438, 192.0841064453125, -115.90225219726562, 193.015380859375, 24.373043060302734, 205.17881774902344, 166.62399291992188, 117.60751342773438, 252.37156677246094, -60.82342529296875, 165.6383056640625, -42.409324645996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000626.npy"}
|
||||
{"epoch": 0.9463340891912321, "step": 627, "batch_size": 64, "mean": 85.86093139648438, "std": 111.5857925415039, "min": -191.30389404296875, "p10": -19.04880218505859, "median": 90.10002899169922, "p90": 220.04247131347657, "max": 282.73974609375, "pos_frac": 0.765625, "sample": [138.9961395263672, 0.9061164855957031, 84.78892517089844, 141.41928100585938, 147.1639404296875, 119.71929931640625, 167.01107788085938, 267.18267822265625, 282.73974609375, 121.87271118164062, -8.38616943359375, 221.85696411132812, 189.17041015625, 176.57728576660156, 64.35346984863281, 77.1439208984375, 4.652252197265625, 38.773414611816406, 215.80865478515625, 151.52316284179688, 151.0525665283203, 153.5535888671875, 37.926841735839844, 1.7333984375, -9.837265014648438, -137.24794006347656, 199.3653106689453, -58.648406982421875, 156.10850524902344, -10.983585357666016, 4.254350662231445, 206.60511779785156, 241.69534301757812, -19.82917022705078, 124.7623291015625, -15.498870849609375, -17.227943420410156, 145.22705078125, -71.87025451660156, 222.73370361328125, 275.65985107421875, 66.88726043701172, -164.44248962402344, -3.97332763671875, 51.45226287841797, -9.476577758789062, -6.3023681640625, 122.11585235595703, 31.36602783203125, 254.61585998535156, 52.20069122314453, -191.213623046875, 2.428050994873047, 95.4111328125, 204.1869354248047, 178.68081665039062, 48.377967834472656, -191.30389404296875, 68.49673461914062, 67.63106536865234, 121.58069610595703, 176.33494567871094, 131.12078857421875, 206.11712646484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000627.npy"}
|
||||
{"epoch": 0.9478458049886621, "step": 628, "batch_size": 64, "mean": 81.7187728881836, "std": 121.12657928466797, "min": -254.81092834472656, "p10": -73.57679519653318, "median": 87.63360977172852, "p90": 212.20794525146485, "max": 288.1971435546875, "pos_frac": 0.8125, "sample": [213.27662658691406, 93.39300537109375, 93.330810546875, 148.96302795410156, -54.274559020996094, -153.15689086914062, 230.30947875976562, 32.129791259765625, 180.33538818359375, 192.8922119140625, 214.2232666015625, 241.43658447265625, 113.40617370605469, 46.922237396240234, 40.17786407470703, 172.72406005859375, -254.81092834472656, -162.2494354248047, 187.9123992919922, 40.80770492553711, 209.71435546875, 75.66056823730469, 18.66611671447754, -88.75115966796875, 200.4412078857422, 153.8738250732422, 69.37026977539062, 152.287353515625, 93.50860595703125, 199.4654541015625, 184.58621215820312, 248.59945678710938, 22.953079223632812, 206.30221557617188, 5.092830657958984, 287.8303527832031, -30.06646728515625, 5.398284912109375, 200.29861450195312, -179.12315368652344, 123.65870666503906, 81.93640899658203, 29.851255416870117, 170.8131866455078, 47.317665100097656, 32.15101623535156, 185.92332458496094, 78.25926208496094, 171.29208374023438, 195.5569610595703, 31.064184188842773, -53.34709167480469, 15.592514038085938, -194.63063049316406, 106.47920227050781, 288.1971435546875, 17.93402099609375, 171.8339385986328, 2.7639923095703125, 75.87443542480469, -41.560142517089844, 121.87854766845703, -0.8462104797363281, -81.84918212890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000628.npy"}
|
||||
{"epoch": 0.9493575207860923, "step": 629, "batch_size": 64, "mean": 60.98457336425781, "std": 99.31669616699219, "min": -160.42123413085938, "p10": -36.6782585144043, "median": 40.39935493469238, "p90": 211.430290222168, "max": 297.2603759765625, "pos_frac": 0.65625, "sample": [-50.25550079345703, 116.33490753173828, -160.42123413085938, 104.19010162353516, 90.71617889404297, 4.103813171386719, -20.546310424804688, 82.35478210449219, -137.5048065185547, 89.39295959472656, -20.740158081054688, 30.06873321533203, 8.188882827758789, 215.64356994628906, -21.540517807006836, -45.481632232666016, 0.4605846405029297, -13.209892272949219, -66.30752563476562, 49.206756591796875, 40.84442138671875, -3.6631088256835938, -6.394403457641602, 39.954288482666016, -51.377994537353516, 43.980247497558594, 297.2603759765625, 240.67483520507812, -11.730825424194336, 187.193115234375, 116.9910888671875, -36.81060028076172, 97.52461242675781, 93.50800323486328, -14.275875091552734, 97.11053466796875, 117.01969909667969, 17.49456787109375, 3.9276771545410156, -0.7972660064697266, -0.6220245361328125, 62.85203170776367, 248.70657348632812, -2.504169464111328, -13.989311218261719, -32.44287109375, 50.56401062011719, 150.9193572998047, 149.40231323242188, 172.05322265625, -18.842391967773438, 73.82919311523438, 239.49777221679688, 15.558366775512695, 165.24041748046875, 270.61163330078125, 5.945274353027344, 246.29071044921875, 166.7580108642578, 201.59930419921875, 35.581146240234375, 160.9761962890625, -36.36946105957031, 68.3102035522461], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000629.npy"}
|
||||
{"epoch": 0.9508692365835223, "step": 630, "batch_size": 64, "mean": 84.10877990722656, "std": 107.357421875, "min": -159.88394165039062, "p10": -42.76033401489256, "median": 66.90908432006836, "p90": 216.84426574707038, "max": 325.6362609863281, "pos_frac": 0.765625, "sample": [-73.38133239746094, 184.181640625, -16.037158966064453, 16.741920471191406, 168.26869201660156, 260.4622802734375, 125.46356964111328, 2.036113739013672, 242.06149291992188, 161.78958129882812, -2.6860580444335938, 183.31329345703125, -9.836158752441406, 190.42523193359375, 17.86705780029297, 185.48098754882812, -111.4903564453125, 75.60662078857422, 154.18063354492188, -153.3946533203125, 60.591888427734375, 4.303016662597656, 66.20836639404297, 189.99853515625, 258.4136657714844, 9.311210632324219, -52.238319396972656, 47.951717376708984, -159.88394165039062, 159.29837036132812, -7.574760437011719, 56.53614807128906, 6.192783355712891, 248.1371307373047, 154.56747436523438, 67.60980224609375, 225.05015563964844, 34.90095520019531, 73.86932373046875, 113.15523529052734, 325.6362609863281, 35.67926025390625, 197.6971893310547, -68.87905883789062, 192.5150146484375, 134.66131591796875, 168.2246856689453, 47.28607177734375, 112.60847473144531, -61.739227294921875, 59.73664093017578, -4.325355529785156, -15.758159637451172, 251.57205200195312, 180.5982666015625, 18.930036544799805, 103.5801773071289, 19.771671295166016, 188.11343383789062, 191.67813110351562, -20.645034790039062, 59.514556884765625, 112.2245864868164, -3.171001434326172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000630.npy"}
|
||||
{"epoch": 0.9523809523809523, "step": 631, "batch_size": 64, "mean": 94.20515441894531, "std": 120.4224853515625, "min": -167.75469970703125, "p10": -40.56397476196289, "median": 77.70166015625, "p90": 225.1446975708008, "max": 359.918212890625, "pos_frac": 0.71875, "sample": [185.68959045410156, 85.93312072753906, -49.5037841796875, 225.073486328125, 154.84242248535156, 183.18569946289062, 224.42379760742188, 166.9456787109375, 65.8096923828125, -24.873504638671875, 44.724822998046875, -6.713954925537109, 149.4494171142578, 25.394973754882812, -3.5402793884277344, 40.69976043701172, -13.876327514648438, 43.4141960144043, 39.970008850097656, 288.5546875, 290.39556884765625, -70.79782104492188, 216.778564453125, 225.1752166748047, 159.59622192382812, 56.625, 24.803558349609375, 316.2693176269531, -4.755849838256836, 146.04930114746094, 43.174068450927734, 45.73925018310547, -160.37911987304688, 117.88417053222656, 98.44252014160156, 316.29522705078125, 184.2382354736328, -15.077972412109375, 359.918212890625, 216.9818572998047, 113.3826675415039, -38.29508972167969, -112.74424743652344, 170.0706329345703, -16.718017578125, -167.75469970703125, -12.205488204956055, 3.298563003540039, -67.66362762451172, 160.72836303710938, -41.536354064941406, 140.058349609375, -13.558425903320312, 218.87741088867188, -4.360095977783203, 176.06298828125, 9.258365631103516, 224.4766845703125, 69.47019958496094, 277.10821533203125, 135.4965362548828, 187.40890502929688, 16.261943817138672, 209.04684448242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000631.npy"}
|
||||
{"epoch": 0.9538926681783825, "step": 632, "batch_size": 64, "mean": 88.55610656738281, "std": 105.92681884765625, "min": -204.1995849609375, "p10": -29.836428070068358, "median": 82.93908309936523, "p90": 212.71864166259766, "max": 298.5232849121094, "pos_frac": 0.78125, "sample": [120.42156219482422, 238.4801025390625, 103.67303466796875, -1.6657371520996094, 60.21419906616211, 193.29563903808594, 5.593288421630859, 126.02474212646484, -23.774486541748047, 203.52508544921875, 108.02288818359375, -56.621856689453125, 98.67355346679688, -8.392650604248047, -71.52745056152344, 208.82809448242188, 223.12655639648438, 5.452659606933594, 181.57701110839844, -84.65412139892578, 39.39691162109375, 26.851398468017578, -146.48577880859375, 25.919235229492188, 64.52796173095703, 176.0760498046875, 66.63958740234375, 151.87759399414062, 73.89199829101562, -11.555850982666016, 173.1643524169922, 298.5232849121094, 102.81216430664062, -30.698394775390625, 250.36770629882812, 12.558610916137695, 214.95501708984375, 35.22125244140625, 91.98616790771484, 124.42450714111328, 206.48117065429688, 195.10289001464844, 16.906402587890625, -24.33831024169922, 9.636463165283203, 209.66799926757812, 165.03762817382812, 128.4261016845703, 209.132568359375, 191.4170684814453, 197.38742065429688, -204.1995849609375, 59.356842041015625, 155.99176025390625, -2.2290802001953125, -27.825172424316406, 211.30117797851562, 236.77618408203125, 213.3261260986328, 47.43408203125, 65.5304946899414, 29.113666534423828, -48.47611618041992, 55.90678024291992], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000632.npy"}
|
||||
{"epoch": 0.9554043839758125, "step": 633, "batch_size": 64, "mean": 68.55831909179688, "std": 104.3615951538086, "min": -138.92843627929688, "p10": -56.78437995910644, "median": 49.9773006439209, "p90": 204.0675277709961, "max": 289.40350341796875, "pos_frac": 0.71875, "sample": [-3.9091625213623047, 33.92985534667969, 12.088905334472656, 115.45260620117188, 179.36473083496094, -23.4283447265625, 136.31472778320312, 147.63919067382812, 53.09236145019531, 88.43207550048828, -6.3181610107421875, 22.10099983215332, 275.94976806640625, 50.1041145324707, -48.1981086730957, 150.03076171875, 19.920455932617188, 107.58372497558594, 84.35306549072266, -28.333518981933594, 7.82025146484375, -51.029441833496094, 108.06298065185547, 33.20343780517578, 183.0061798095703, 64.26500701904297, -25.918231964111328, -27.075103759765625, 132.7041015625, 110.07889556884766, 186.0347137451172, 9.442543029785156, 201.06019592285156, 43.26283264160156, -59.25078201293945, 11.447986602783203, -92.38529968261719, -43.0062255859375, -65.49640655517578, -32.62206268310547, -84.23735046386719, 51.52882766723633, 1.9926948547363281, 255.7642822265625, 49.850486755371094, 74.65670013427734, 38.824195861816406, -74.99193572998047, 66.53297424316406, 194.3944549560547, 197.76171875, 186.03065490722656, 205.35638427734375, 196.035400390625, 3.2353057861328125, -81.93903350830078, -7.200649261474609, 23.91213607788086, 170.01068115234375, 225.8916778564453, 289.40350341796875, -138.92843627929688, 259.44671630859375, 224.62570190429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000633.npy"}
|
||||
{"epoch": 0.9569160997732427, "step": 634, "batch_size": 64, "mean": 28.05582046508789, "std": 108.77313995361328, "min": -198.57762145996094, "p10": -118.43762893676755, "median": 2.7845849990844727, "p90": 182.66661071777347, "max": 326.83172607421875, "pos_frac": 0.53125, "sample": [-5.973392486572266, -130.2252197265625, -4.501823425292969, 193.21047973632812, 21.4041748046875, -15.694234848022461, -15.69150161743164, -16.114408493041992, 56.37487030029297, 23.652114868164062, 12.14158821105957, -14.130104064941406, 0.8921279907226562, -37.428871154785156, -16.333145141601562, 72.46851348876953, 139.52305603027344, 57.08115768432617, -67.62601470947266, -20.1324462890625, -22.25354766845703, -1.5485801696777344, 145.78854370117188, -153.73263549804688, 202.04127502441406, 3.9270782470703125, 210.1384735107422, 161.09434509277344, 326.83172607421875, -150.0979461669922, -14.31768798828125, 16.02292823791504, -6.139690399169922, -34.37074661254883, 184.80551147460938, 4.314443588256836, -90.9332504272461, -161.7966766357422, 229.18458557128906, 176.87954711914062, 143.89732360839844, -84.07766723632812, -137.14389038085938, 21.692028045654297, -171.13674926757812, -198.57762145996094, -13.122425079345703, 1.7101554870605469, 187.4702606201172, 63.44760513305664, -25.74542236328125, -55.02690124511719, 144.83895874023438, 57.077178955078125, -1.708669662475586, -6.928905487060547, 177.67584228515625, 138.2144317626953, 65.61389923095703, 104.45645904541016, 22.082618713378906, 3.8590145111083984, -48.909217834472656, 147.17959594726562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000634.npy"}
|
||||
{"epoch": 0.9584278155706727, "step": 635, "batch_size": 64, "mean": 55.71092224121094, "std": 116.7091064453125, "min": -198.45169067382812, "p10": -100.09546813964843, "median": 42.29596710205078, "p90": 215.44763641357423, "max": 254.30711364746094, "pos_frac": 0.65625, "sample": [-100.79835510253906, 183.7298583984375, -110.97896575927734, 12.403797149658203, 60.525909423828125, 22.255638122558594, -98.45539855957031, -151.61094665527344, -124.23782348632812, 189.45054626464844, 0.63739013671875, 180.306396484375, 20.911048889160156, -6.663887023925781, 220.423828125, 220.31539916992188, 162.5248260498047, 11.118316650390625, 224.22738647460938, 194.03347778320312, -84.37678527832031, 23.25860595703125, -43.91712188720703, 233.3616943359375, 59.133941650390625, -73.20927429199219, 174.0718994140625, -3.0405941009521484, -13.03006362915039, 100.95684814453125, 137.3994140625, -56.22917938232422, 200.5399169921875, 4.032234191894531, 63.47565460205078, 9.926822662353516, 189.57493591308594, -13.51174545288086, 216.24107360839844, -172.37429809570312, 143.57720947265625, 249.55258178710938, -48.49832534790039, 213.59628295898438, -32.87138366699219, -38.20494079589844, 90.34991455078125, 86.2225570678711, 116.50816345214844, -48.85127258300781, 110.9004898071289, 254.30711364746094, -8.92236328125, 83.3772201538086, -198.45169067382812, 140.92391967773438, 148.87449645996094, 25.457992553710938, -6.960540771484375, 152.5795440673828, -124.82585144042969, 110.57073974609375, 80.68045806884766, 3.204315185546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000635.npy"}
|
||||
{"epoch": 0.9599395313681028, "step": 636, "batch_size": 64, "mean": 76.18397521972656, "std": 101.56307983398438, "min": -188.26412963867188, "p10": -20.68227157592773, "median": 83.07183074951172, "p90": 203.93356933593753, "max": 302.115966796875, "pos_frac": 0.8125, "sample": [194.03724670410156, 11.437675476074219, 22.637319564819336, 13.718528747558594, 92.0837631225586, 18.991920471191406, 83.03071594238281, -5.572395324707031, -31.13367462158203, 191.01441955566406, 124.33352661132812, -9.35746955871582, 190.95498657226562, 158.03182983398438, 48.732696533203125, 44.93638610839844, -49.79655456542969, 208.1748504638672, 79.8855209350586, 135.39254760742188, 162.28329467773438, 5.805534362792969, 2.584440231323242, 163.8994903564453, 0.8144378662109375, 91.84020233154297, 271.6072082519531, 5.061761856079102, -1.1984844207763672, 91.26183319091797, 109.30905151367188, 105.67290496826172, 255.0187530517578, 86.63522338867188, -16.878280639648438, 165.73550415039062, 115.47593688964844, 50.498985290527344, -22.31255340576172, 179.99142456054688, 113.309814453125, 209.19886779785156, -13.4097900390625, 302.115966796875, 9.40328598022461, -188.26412963867188, 224.5229949951172, 52.41123962402344, 2.7358055114746094, 184.04483032226562, 153.88966369628906, 93.73583221435547, 136.61766052246094, 223.18234252929688, 105.34031677246094, -145.2904052734375, -183.816650390625, 13.154092788696289, 83.11294555664062, 106.83060455322266, 31.894287109375, -92.90045928955078, 67.68772888183594, 41.627105712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000636.npy"}
|
||||
{"epoch": 0.9614512471655329, "step": 637, "batch_size": 64, "mean": 55.891334533691406, "std": 119.18309020996094, "min": -304.0263977050781, "p10": -85.25613861083983, "median": 32.716800689697266, "p90": 215.99361877441407, "max": 378.6059265136719, "pos_frac": 0.671875, "sample": [-7.9690399169921875, 247.13595581054688, 14.894905090332031, 378.6059265136719, -28.19091033935547, -24.164703369140625, -10.459541320800781, 130.61392211914062, 229.76303100585938, 29.682445526123047, -41.11699676513672, 32.841712951660156, 132.19808959960938, -110.86701202392578, 152.5389404296875, 60.10408401489258, -20.095436096191406, 129.9751739501953, 204.822265625, -22.239030838012695, 109.69783020019531, 46.60789489746094, 13.7003173828125, -1.3216018676757812, 9.432409286499023, 40.06414794921875, -90.01323699951172, 161.30262756347656, 256.98138427734375, 200.14617919921875, 197.40545654296875, 12.416023254394531, -151.6658477783203, 150.521728515625, -38.05975341796875, 144.6626739501953, -112.21246337890625, -6.417938232421875, 19.31513214111328, 47.64185333251953, 91.14387512207031, 239.2247314453125, 3.0240345001220703, 17.437320709228516, 216.61587524414062, 24.329856872558594, 6.7585601806640625, 90.12060546875, -117.27288818359375, -11.583412170410156, 32.591888427734375, 243.2251739501953, 98.59368896484375, 83.9332046508789, 35.03703308105469, -145.40435791015625, 214.54168701171875, 54.415626525878906, -16.54253387451172, -304.0263977050781, 107.94100189208984, 201.41156005859375, -74.15624237060547, -2.592926025390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000637.npy"}
|
||||
{"epoch": 0.9629629629629629, "step": 638, "batch_size": 64, "mean": 67.22999572753906, "std": 103.99407958984375, "min": -170.72808837890625, "p10": -37.70585060119629, "median": 20.763891220092773, "p90": 208.73890380859376, "max": 280.7823486328125, "pos_frac": 0.734375, "sample": [-170.72808837890625, 161.70359802246094, 121.90049743652344, 2.0439186096191406, -145.73487854003906, -1.0114593505859375, 211.9416046142578, 149.45950317382812, 41.63613510131836, -22.108356475830078, 8.017471313476562, -55.55777359008789, -22.831741333007812, -31.239700317382812, -50.65179443359375, 251.74801635742188, 0.97491455078125, -40.47076416015625, 208.12554931640625, 17.292022705078125, 121.02799987792969, 13.934577941894531, 159.56378173828125, 213.15725708007812, -96.30890655517578, 232.11595153808594, 125.60253143310547, 185.48670959472656, 1.4660415649414062, 59.10797119140625, -4.648096084594727, 215.06863403320312, 159.64491271972656, 172.7464599609375, 22.46185302734375, -4.400888442993164, 15.986259460449219, 148.62582397460938, 13.063766479492188, 202.38510131835938, 81.53978729248047, 209.00177001953125, 205.65040588378906, -5.557285308837891, 192.7888946533203, -36.9871826171875, -3.646045684814453, 185.26641845703125, 1.1451263427734375, 23.46045684814453, 0.4646167755126953, 19.065929412841797, 280.7823486328125, 65.82913208007812, -38.013851165771484, 18.012351989746094, 3.8142547607421875, 202.09490966796875, 192.6088104248047, 12.234683990478516, -15.523574829101562, 4.0556182861328125, 68.6978530883789, 45.338157653808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000638.npy"}
|
||||
{"epoch": 0.9644746787603931, "step": 639, "batch_size": 64, "mean": 80.41844940185547, "std": 111.77835083007812, "min": -123.94668579101562, "p10": -52.39817276000976, "median": 58.37627410888672, "p90": 230.01715850830078, "max": 318.05938720703125, "pos_frac": 0.734375, "sample": [197.48033142089844, 138.72549438476562, 133.22854614257812, -75.67491912841797, 56.49322509765625, 189.30813598632812, 12.797439575195312, -35.49014663696289, -61.931541442871094, 143.58213806152344, 25.965469360351562, 70.6790771484375, 284.99261474609375, 52.060333251953125, 178.79576110839844, 4.091583251953125, 60.25932312011719, 5.0168609619140625, -119.65096282958984, 110.7295150756836, 156.42608642578125, 196.8025360107422, -1.8067035675048828, 265.17706298828125, 10.240592956542969, 133.64529418945312, 69.77081298828125, 200.45631408691406, -55.85325622558594, 230.13441467285156, 237.9989776611328, 1.045339584350586, 0.06464958190917969, 148.01181030273438, -4.210765838623047, 25.218360900878906, 149.88858032226562, -11.334793090820312, 318.05938720703125, 177.29318237304688, 182.51913452148438, 60.52423095703125, 290.3000183105469, 122.57503509521484, -97.61138916015625, -13.094356536865234, -30.90337371826172, -63.24895095825195, 44.75947570800781, 22.735729217529297, 240.31320190429688, 180.93099975585938, 7.332054138183594, 32.355377197265625, -26.87603759765625, -123.94668579101562, 29.435386657714844, -40.69960021972656, -44.33631134033203, 229.74356079101562, -40.173484802246094, 162.7213134765625, 187.0903778076172, 215.848876953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000639.npy"}
|
||||
{"epoch": 0.9659863945578231, "step": 640, "batch_size": 64, "mean": 73.06314086914062, "std": 107.07379150390625, "min": -201.3687744140625, "p10": -45.41654891967773, "median": 54.46025848388672, "p90": 200.2176239013672, "max": 314.8126220703125, "pos_frac": 0.796875, "sample": [188.8213348388672, 37.7748908996582, 314.8126220703125, -29.200529098510742, 20.918899536132812, 130.96539306640625, 131.77145385742188, 5.43353271484375, 173.42965698242188, 201.6978302001953, 61.1014404296875, 21.92711639404297, 137.143798828125, 168.68734741210938, -142.0724639892578, 143.87339782714844, 65.75789642333984, 34.107303619384766, -60.07297897338867, 214.277587890625, -193.4628143310547, 153.73153686523438, 248.6582794189453, 57.40455627441406, -50.06529235839844, -12.30118179321289, 148.98025512695312, 29.688819885253906, -20.88407325744629, 9.77212142944336, 173.80169677734375, 4.5974884033203125, 61.940155029296875, 205.2235565185547, 112.97280883789062, -1.98406982421875, -16.830467224121094, 41.231048583984375, -71.10505676269531, 70.56851959228516, 46.17250061035156, 195.55311584472656, 7.287895202636719, 51.515960693359375, 196.76380920410156, 130.67141723632812, 89.89189910888672, -201.3687744140625, 20.62993049621582, 188.74404907226562, 144.32183837890625, 256.69708251953125, 33.29997253417969, 307.72149658203125, 14.617118835449219, -47.69121551513672, 179.80892944335938, -40.10899353027344, 131.6358642578125, 26.887910842895508, 12.947711944580078, 29.59765625, 31.192678451538086, 96.15801239013672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000640.npy"}
|
||||
{"epoch": 0.9674981103552532, "step": 641, "batch_size": 64, "mean": 65.28868865966797, "std": 88.90745544433594, "min": -213.7725067138672, "p10": -16.704131698608393, "median": 52.606319427490234, "p90": 183.45852508544925, "max": 309.9058837890625, "pos_frac": 0.796875, "sample": [-19.48971176147461, -116.91593170166016, 92.43760681152344, 97.42463684082031, 26.72821044921875, -6.623527526855469, 158.08139038085938, 0.9741592407226562, 1.0795669555664062, -10.204444885253906, 40.48432922363281, 12.42759895324707, 148.098388671875, 164.27044677734375, 77.58016967773438, 7.547468185424805, -4.6708984375, 208.56475830078125, 194.71871948242188, 153.91555786132812, 133.18374633789062, -7.678932189941406, 42.572959899902344, 55.11769104003906, 102.94583892822266, 212.4321746826172, 9.696701049804688, 77.27153015136719, 26.737411499023438, 110.705810546875, 8.20269775390625, 78.7064208984375, -61.485816955566406, 96.1600112915039, 13.5296630859375, -63.27471160888672, 148.35940551757812, 64.27525329589844, -22.17060089111328, 57.470428466796875, -213.7725067138672, 131.2166748046875, 62.210418701171875, 21.157333374023438, 100.28338623046875, 226.04598999023438, 16.91460418701172, -3.0906124114990234, 175.3643798828125, 35.52851867675781, 309.9058837890625, 117.36903381347656, 221.34603881835938, 186.9274444580078, -22.038299560546875, 97.64189147949219, 125.7491455078125, 29.175399780273438, 33.91720962524414, 10.013885498046875, -7.0820465087890625, 13.05731201171875, 50.094947814941406, 151.35423278808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000641.npy"}
|
||||
{"epoch": 0.9690098261526833, "step": 642, "batch_size": 64, "mean": 47.74443054199219, "std": 110.34149169921875, "min": -186.82594299316406, "p10": -83.58524932861327, "median": 38.57415008544922, "p90": 188.10764770507814, "max": 343.6157531738281, "pos_frac": 0.6875, "sample": [68.80286407470703, 11.708457946777344, 2.7110061645507812, -112.98893737792969, 49.35851287841797, -11.116491317749023, 122.54908752441406, 199.80258178710938, -93.45022583007812, -163.3487091064453, 343.6157531738281, -5.624275207519531, 109.840576171875, -54.678955078125, 89.64785766601562, 63.839263916015625, 144.188232421875, 237.86294555664062, -61.11753845214844, -17.65372085571289, 134.60873413085938, 133.46600341796875, 186.52316284179688, -36.567073822021484, 106.73991394042969, 174.90354919433594, 194.94554138183594, 31.06396484375, 182.5761260986328, 40.72443389892578, 106.56793975830078, 95.78240966796875, 80.27227783203125, 17.57889175415039, -14.878211975097656, 161.491455078125, -76.95509338378906, 67.28137969970703, 11.277824401855469, -84.46748352050781, 28.055912017822266, -25.10723876953125, 7.623788833618164, -41.477516174316406, 152.6180419921875, 12.054178237915039, -81.52670288085938, 154.2130126953125, 158.43911743164062, 212.748291015625, 46.855926513671875, 36.423866271972656, 188.78671264648438, 119.45079803466797, 198.722900390625, 36.35715866088867, -76.90569305419922, 67.27725982666016, -186.82594299316406, -176.28952026367188, 5.2201690673828125, -71.30339050292969, -158.76527404785156, 12.113616943359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000642.npy"}
|
||||
{"epoch": 0.9705215419501134, "step": 643, "batch_size": 64, "mean": 75.09587097167969, "std": 114.68622589111328, "min": -195.6256103515625, "p10": -65.59455947875976, "median": 77.28838729858398, "p90": 215.66553344726563, "max": 343.5206298828125, "pos_frac": 0.75, "sample": [66.48269653320312, 86.00151062011719, 95.9390869140625, 33.31787109375, 1.2125730514526367, 243.01739501953125, 0.8257541656494141, 181.2615509033203, 88.92400360107422, 30.933677673339844, 162.027587890625, 35.093605041503906, 154.95716857910156, 1.0970001220703125, 145.337890625, 135.2523193359375, -31.584136962890625, 8.144357681274414, 206.875244140625, 217.5118408203125, 173.19509887695312, 20.06165313720703, 187.25131225585938, 80.65496826171875, -52.85004425048828, 211.35748291015625, 61.23863220214844, -42.524566650390625, 202.88525390625, 294.9901428222656, 101.311767578125, 165.6817169189453, -40.534698486328125, 167.45545959472656, 72.76081848144531, -76.79884338378906, 52.79791259765625, -6.758880615234375, 243.462890625, 174.734619140625, 152.16293334960938, -74.64592742919922, 146.7018280029297, -13.959892272949219, 164.8182373046875, -153.3553009033203, -129.03143310546875, -195.6256103515625, 2.3642578125, 21.938426971435547, -61.827674865722656, 101.0303726196289, 232.73098754882812, 95.02123260498047, 78.9649658203125, -67.20893859863281, -17.081069946289062, 238.09872436523438, 57.07775115966797, 75.61180877685547, -36.409976959228516, 343.5206298828125, 143.5694580078125, -151.33169555664062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000643.npy"}
|
||||
{"epoch": 0.9720332577475435, "step": 644, "batch_size": 64, "mean": 52.899391174316406, "std": 111.41433715820312, "min": -250.0125732421875, "p10": -79.72886810302732, "median": 26.535808563232422, "p90": 194.90158233642578, "max": 235.28378295898438, "pos_frac": 0.703125, "sample": [219.0743408203125, -118.31959533691406, -250.0125732421875, 167.91940307617188, -40.45817184448242, -90.29251098632812, 8.981925964355469, -0.42414283752441406, 188.59120178222656, 235.28378295898438, 66.92666625976562, 47.51918411254883, 193.55484008789062, 155.87295532226562, 97.45060729980469, 64.29974365234375, 47.339778900146484, 36.187782287597656, 0.09484100341796875, 188.78359985351562, 10.746044158935547, 194.80755615234375, -158.50888061523438, -31.477737426757812, -8.656417846679688, 118.38436126708984, 2.8631820678710938, 119.53456115722656, 5.9407958984375, -55.08036804199219, 194.94187927246094, -26.61395263671875, -2.5660018920898438, 161.38653564453125, 170.55859375, -0.7636947631835938, -42.984588623046875, 225.7371063232422, 200.4187774658203, 17.611045837402344, 108.82720184326172, 202.80206298828125, 191.4084930419922, 33.25342559814453, 13.311721801757812, -208.4916534423828, -15.484130859375, 121.91664123535156, 18.997230529785156, 162.9215545654297, -49.16239929199219, 20.650924682617188, 5.82276725769043, -110.82628631591797, 20.463287353515625, -0.3550872802734375, -130.67803955078125, 110.17079162597656, 130.5168914794922, 4.4062347412109375, 20.027633666992188, 171.44363403320312, 32.420692443847656, 216.544921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000644.npy"}
|
||||
{"epoch": 0.9735449735449735, "step": 645, "batch_size": 64, "mean": 66.37191009521484, "std": 93.85557556152344, "min": -193.30831909179688, "p10": -28.69466209411621, "median": 48.98090934753418, "p90": 184.83616943359377, "max": 278.94775390625, "pos_frac": 0.78125, "sample": [32.901859283447266, 217.370361328125, 129.44610595703125, -99.35135650634766, 103.14466857910156, 256.4531555175781, 142.62646484375, -0.4523181915283203, -19.50127410888672, 26.490089416503906, 16.97304916381836, -125.55967712402344, 50.59632110595703, 1.7378044128417969, 177.58343505859375, 45.25151062011719, 116.96949768066406, 0.4928855895996094, -8.29681396484375, 25.451459884643555, 196.39288330078125, -29.42426300048828, 4.415840148925781, 105.7310562133789, 61.461647033691406, -12.281791687011719, 71.82183074951172, 106.77951049804688, 193.7959747314453, 118.73360443115234, 125.69207000732422, -2.5381603240966797, 72.22112274169922, 18.773422241210938, 8.576400756835938, 156.2804412841797, -113.72529602050781, 28.079002380371094, -55.7535400390625, 212.13453674316406, 112.99235534667969, 161.28277587890625, 278.94775390625, -11.786582946777344, 47.36549758911133, 120.3083724975586, 150.04428100585938, 95.39789581298828, 93.98281860351562, 133.28115844726562, 28.303237915039062, 142.19772338867188, 26.345813751220703, 180.03076171875, 43.525299072265625, 1.275360107421875, 152.4901123046875, -26.992259979248047, -57.938262939453125, -193.30831909179688, 24.28447723388672, 186.8956298828125, 162.53115844726562, 38.85155487060547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000645.npy"}
|
||||
{"epoch": 0.9750566893424036, "step": 646, "batch_size": 64, "mean": 58.49915313720703, "std": 110.96373748779297, "min": -198.5680694580078, "p10": -78.11041030883787, "median": 40.59420967102051, "p90": 196.76537322998047, "max": 336.8748474121094, "pos_frac": 0.671875, "sample": [-60.196136474609375, 1.1286907196044922, -5.197364807128906, 133.4445343017578, 192.86793518066406, 194.3166046142578, -133.7762451171875, 233.68844604492188, 98.20439147949219, 205.4539031982422, -10.325912475585938, 17.88074493408203, 189.76504516601562, 196.2082977294922, 144.60543823242188, -4.477203369140625, 95.92504119873047, 104.73049926757812, 197.00411987304688, 122.34610748291016, 222.91473388671875, 100.9339599609375, -198.5680694580078, -85.78795623779297, -93.4295654296875, 125.84676361083984, 3.686107635498047, -14.552642822265625, 15.836074829101562, 44.9862060546875, 72.74606323242188, -1.8141860961914062, -8.067329406738281, 213.3170928955078, 84.00759887695312, 0.07605743408203125, 54.616058349609375, 13.082382202148438, 64.230712890625, 147.33428955078125, -1.242197036743164, -54.237953186035156, 124.93035888671875, -99.76069641113281, 144.31729125976562, 336.8748474121094, 183.05075073242188, 151.6146240234375, 33.905426025390625, 271.5853271484375, 19.590024948120117, -0.070281982421875, 4.9237060546875, -26.409805297851562, -191.9825439453125, 51.18345642089844, 98.34024047851562, -146.0914764404297, -39.50077819824219, 161.71743774414062, 28.112438201904297, -12.146194458007812, -5.952037811279297, 36.202213287353516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000646.npy"}
|
||||
{"epoch": 0.9765684051398337, "step": 647, "batch_size": 64, "mean": 94.1419448852539, "std": 104.21862030029297, "min": -162.6533660888672, "p10": -32.470890045166016, "median": 102.77191162109375, "p90": 217.86748809814455, "max": 324.7520751953125, "pos_frac": 0.8125, "sample": [-8.547161102294922, 123.2215347290039, 192.6710205078125, 17.968154907226562, 73.03439331054688, 128.99842834472656, 30.407684326171875, 229.251953125, 138.01971435546875, 291.92950439453125, 104.34794616699219, 157.343994140625, 162.333740234375, 1.1391487121582031, -32.99082946777344, -0.7845458984375, 324.7520751953125, -23.070650100708008, -74.92404174804688, 195.2430419921875, 189.12721252441406, 82.29568481445312, 121.21199798583984, 108.98898315429688, 220.51388549804688, 171.57476806640625, 190.04879760742188, 1.8338947296142578, 203.7007598876953, 44.53204345703125, 183.64523315429688, 8.23779296875, 170.25491333007812, 8.004531860351562, 156.41952514648438, -23.873199462890625, 22.150501251220703, 204.05148315429688, 39.380699157714844, -83.17041778564453, 127.96133422851562, 38.721290588378906, 37.44977951049805, 160.61846923828125, -162.6533660888672, 48.62195587158203, 89.52922821044922, 27.302371978759766, 54.96354675292969, 101.19587707519531, -42.6788330078125, 239.27841186523438, 184.5960235595703, -54.95159912109375, -106.03936767578125, 181.59375, 55.70664978027344, 278.0673522949219, -31.25769805908203, 219.86151123046875, 213.2147674560547, 36.13573455810547, 150.08111572265625, 128.49227905273438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000647.npy"}
|
||||
{"epoch": 0.9780801209372638, "step": 648, "batch_size": 64, "mean": 66.47093200683594, "std": 122.46299743652344, "min": -204.30172729492188, "p10": -86.38832550048828, "median": 68.62725067138672, "p90": 206.00916442871096, "max": 333.22509765625, "pos_frac": 0.671875, "sample": [195.10824584960938, 38.442893981933594, 24.56104278564453, -191.2177276611328, 192.34237670898438, -75.77067565917969, -204.30172729492188, 2.697113037109375, 199.1973876953125, 52.876930236816406, -69.33467102050781, 118.2989273071289, 60.08281707763672, -31.39586639404297, -102.57716369628906, -145.9298553466797, 43.51511001586914, -17.11956024169922, 333.22509765625, 162.58233642578125, 199.48097229003906, 133.63412475585938, 126.91802978515625, 147.20712280273438, 180.11294555664062, -17.39453125, 117.2320556640625, 212.54150390625, 61.459938049316406, 75.43215942382812, -54.131195068359375, -3.64630126953125, 303.5787353515625, 299.2208251953125, 2.0952377319335938, -126.67786407470703, 46.67231750488281, 108.91486358642578, 162.75830078125, -2.8982009887695312, 131.72216796875, 198.3648681640625, 218.37452697753906, -78.92637634277344, -17.05493927001953, 9.749992370605469, 61.82234191894531, 130.29539489746094, 208.8069610595703, 216.44009399414062, 167.96414184570312, 148.53408813476562, 159.0664520263672, -177.41795349121094, -89.5863037109375, 84.57115173339844, 122.49038696289062, -55.72260284423828, 88.01144409179688, -40.676265716552734, -2.7869873046875, -42.173255920410156, 134.74960327148438, 119.72676849365234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000648.npy"}
|
||||
{"epoch": 0.9795918367346939, "step": 649, "batch_size": 64, "mean": 68.25140380859375, "std": 113.27247619628906, "min": -210.96238708496094, "p10": -101.72400512695312, "median": 71.46549606323242, "p90": 192.2116226196289, "max": 299.93487548828125, "pos_frac": 0.6875, "sample": [156.74339294433594, 152.95249938964844, -41.7675895690918, 172.24911499023438, 174.48831176757812, 183.08499145507812, -17.229110717773438, -8.410760879516602, -16.919357299804688, 179.45675659179688, 6.622442245483398, 299.93487548828125, 140.2277374267578, -1.1947059631347656, 190.0712127685547, 183.35496520996094, 126.64906311035156, 61.194664001464844, 156.32948303222656, 205.20034790039062, 192.53982543945312, -6.273591995239258, -125.3191146850586, -21.534080505371094, -210.96238708496094, 110.57211303710938, 8.293632507324219, 54.80933380126953, -118.73345947265625, -96.68002319335938, 75.14558410644531, 60.18553924560547, 159.98997497558594, -1.0365734100341797, 106.53829956054688, 166.34561157226562, 67.78540802001953, 171.2206268310547, 125.56818389892578, 188.21510314941406, 16.28009796142578, 211.17645263671875, 86.79631805419922, -21.375125885009766, 21.9453125, 191.44581604003906, 2.96685791015625, 19.524940490722656, 7.0188140869140625, 83.50187683105469, 32.244407653808594, 258.923583984375, -103.88571166992188, -170.18222045898438, 235.71531677246094, -29.074649810791016, -114.62586975097656, -121.70146179199219, 107.6583480834961, 196.95042419433594, 180.88458251953125, 92.5831527709961, -2.8722496032714844, -23.5174560546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000649.npy"}
|
||||
{"epoch": 0.981103552532124, "step": 650, "batch_size": 64, "mean": 69.26091003417969, "std": 115.01304626464844, "min": -200.43247985839844, "p10": -57.95213394165039, "median": 42.56512641906738, "p90": 201.81290130615236, "max": 266.7286376953125, "pos_frac": 0.71875, "sample": [-36.31715774536133, 250.6328887939453, 216.82754516601562, 139.9829559326172, 19.327423095703125, 177.53042602539062, 185.15863037109375, 88.75630187988281, 8.726402282714844, 36.08218765258789, 168.10342407226562, 187.44691467285156, -150.89918518066406, -4.9317779541015625, 2.6448001861572266, 159.56777954101562, 162.6332550048828, 26.777788162231445, 233.8002471923828, 0.2867755889892578, 182.0446014404297, 119.64984893798828, -46.15380859375, 12.793468475341797, 185.52622985839844, 203.91586303710938, 196.90599060058594, 194.90386962890625, 122.2237777709961, -113.80272674560547, 2.514598846435547, -150.21803283691406, -8.759136199951172, 131.083251953125, -196.97796630859375, 30.700393676757812, 194.59756469726562, 86.15858459472656, 19.56342315673828, -22.428688049316406, -59.757080078125, 75.66215515136719, -200.43247985839844, 181.45367431640625, -63.05023193359375, -0.24105072021484375, 266.7286376953125, 214.32984924316406, 128.9166259765625, -53.74059295654297, 158.60198974609375, 9.183639526367188, 189.67092895507812, -3.144611358642578, 10.927713394165039, 49.048065185546875, -28.940994262695312, -4.4226226806640625, 185.12210083007812, 256.0729675292969, 9.90008544921875, -20.63741683959961, 97.75975036621094, 17.30829620361328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000650.npy"}
|
||||
{"epoch": 0.982615268329554, "step": 651, "batch_size": 64, "mean": 60.20629119873047, "std": 102.22740936279297, "min": -162.951416015625, "p10": -41.401876068115236, "median": 26.86675262451172, "p90": 200.92337493896486, "max": 307.36199951171875, "pos_frac": 0.6875, "sample": [-2.203460693359375, 23.956375122070312, 107.58955383300781, -56.4127197265625, 39.09038543701172, -68.2508773803711, -38.282325744628906, 96.67249298095703, 25.00927734375, 189.98190307617188, 1.854095458984375, -34.79142761230469, -8.004020690917969, 30.992080688476562, 110.05857849121094, -162.951416015625, 12.296730041503906, 298.1427917480469, 59.89623260498047, 196.77955627441406, 200.6260223388672, -6.7457122802734375, -41.88182067871094, 132.76022338867188, 15.054779052734375, 29.538928985595703, 65.05166625976562, 92.6573486328125, -3.5716705322265625, 254.77719116210938, 7.843227386474609, 223.60025024414062, 10.732906341552734, -21.421653747558594, 40.04920959472656, -0.0005283355712890625, 192.21107482910156, 12.5843505859375, -29.978309631347656, 48.70261764526367, 208.55755615234375, 11.270034790039062, 140.33229064941406, -3.656238555908203, -19.036212921142578, 201.05081176757812, 230.94593811035156, 175.35092163085938, -61.74950408935547, 185.6962890625, 20.394800186157227, 8.28594970703125, 2.0392379760742188, 122.65605926513672, 141.0059814453125, 111.72313690185547, 186.80197143554688, -1.9047584533691406, 47.043785095214844, -121.73593139648438, 28.724227905273438, -71.68762969970703, 307.36199951171875, -40.282005310058594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000651.npy"}
|
||||
{"epoch": 0.9841269841269841, "step": 652, "batch_size": 64, "mean": 58.392215728759766, "std": 122.90836334228516, "min": -203.395263671875, "p10": -76.65432357788085, "median": 25.01074981689453, "p90": 204.97830200195312, "max": 442.76251220703125, "pos_frac": 0.6875, "sample": [14.38278579711914, 180.30917358398438, 179.92501831054688, 8.725761413574219, -44.09459686279297, 194.40919494628906, -5.215566635131836, 114.32102966308594, 174.6159210205078, 9.246658325195312, 205.25103759765625, -162.82138061523438, 6.509708404541016, 6.809022903442383, 97.2259521484375, 202.54229736328125, 35.271453857421875, 114.2700424194336, 109.6347885131836, 17.332199096679688, -67.2685546875, 219.28515625, 170.8053436279297, 2.6500473022460938, 16.256153106689453, -46.85607147216797, -203.395263671875, -63.994476318359375, 237.09999084472656, 122.6292953491211, 32.689300537109375, -38.508941650390625, 102.95912170410156, 2.6565475463867188, 143.84927368164062, -80.67679595947266, 228.07217407226562, 114.67178344726562, -15.408485412597656, -151.13014221191406, -31.263107299804688, 204.3419189453125, -9.384376525878906, 153.7747802734375, -50.37261962890625, -29.26974868774414, 218.85910034179688, 166.916259765625, -0.31347084045410156, 211.57888793945312, 442.76251220703125, 127.8460464477539, 108.59883117675781, -13.677864074707031, 0.5833778381347656, 15.049972534179688, -159.0480499267578, 4.181787490844727, -97.44113159179688, 194.96890258789062, 36.05589294433594, 41.235748291015625, -179.20199584960938, 195.28433227539062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000652.npy"}
|
||||
{"epoch": 0.9856386999244142, "step": 653, "batch_size": 64, "mean": 68.54454040527344, "std": 110.7712631225586, "min": -210.5267333984375, "p10": -75.72079467773436, "median": 53.271583557128906, "p90": 201.14409790039062, "max": 303.17431640625, "pos_frac": 0.765625, "sample": [7.293169021606445, 162.09133911132812, 19.22022247314453, -14.2196044921875, 88.35160827636719, 43.60308837890625, 35.699913024902344, -84.15067291259766, 201.43023681640625, 10.236801147460938, -15.00667953491211, 1.5376739501953125, 134.88320922851562, -95.56224060058594, 303.17431640625, 160.1722412109375, 102.01333618164062, 222.68545532226562, 35.48710250854492, 212.43331909179688, 144.31069946289062, 225.91700744628906, 134.40829467773438, -10.869377136230469, 76.71430969238281, 50.115081787109375, 200.4764404296875, 58.07329559326172, -1.1721687316894531, -38.33476257324219, 230.9730224609375, -95.99446105957031, -56.05107879638672, 35.86579513549805, 4.546548843383789, -94.43975830078125, 124.06695556640625, 50.45336151123047, 161.05764770507812, 86.34391784667969, 29.741416931152344, -162.2252960205078, 283.3010559082031, 46.76615905761719, 156.116943359375, -184.12118530273438, 189.34483337402344, -35.978668212890625, 149.66558837890625, 93.12564086914062, -20.20018768310547, 196.03060913085938, -210.5267333984375, 4.1991729736328125, 5.074239730834961, 56.089805603027344, 184.7500457763672, 197.85427856445312, 13.698894500732422, 174.38119506835938, 63.42736053466797, 127.67520141601562, 37.612266540527344, 173.21340942382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000653.npy"}
|
||||
{"epoch": 0.9871504157218443, "step": 654, "batch_size": 64, "mean": 61.59899139404297, "std": 105.51365661621094, "min": -140.330322265625, "p10": -74.92559204101562, "median": 49.649696350097656, "p90": 203.55823516845703, "max": 345.05499267578125, "pos_frac": 0.703125, "sample": [15.484291076660156, 239.7345733642578, -140.330322265625, 136.63194274902344, 52.308837890625, -102.54650115966797, 208.523681640625, 80.95916748046875, 0.5014686584472656, -82.05876159667969, -9.596893310546875, -66.874267578125, 154.12704467773438, 5.138954162597656, -25.524574279785156, 26.340505599975586, 185.7749481201172, -113.91012573242188, 196.92002868652344, 31.1932373046875, 48.36918640136719, 17.511585235595703, 201.9901885986328, 229.67520141601562, -2.7147274017333984, -18.250587463378906, 11.197154998779297, -17.816200256347656, -51.338294982910156, -1.5398521423339844, -5.120719909667969, 143.69894409179688, 345.05499267578125, 107.99893188476562, 22.606468200683594, 213.92807006835938, 224.17221069335938, -29.65142822265625, 62.402557373046875, 92.31539916992188, 136.7066192626953, 106.09950256347656, 140.0185089111328, -117.072021484375, 18.279022216796875, 17.06121063232422, 204.23025512695312, 190.5237579345703, 56.85527038574219, 109.24211120605469, 93.44386291503906, 198.77029418945312, 0.19619369506835938, 74.7284927368164, 176.5035400390625, -78.37615966796875, -5.750679016113281, 121.30169677734375, 109.16651916503906, 106.33535766601562, 37.72372817993164, 50.930206298828125, -57.73329162597656, -134.13473510742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000654.npy"}
|
||||
{"epoch": 0.9886621315192744, "step": 655, "batch_size": 64, "mean": 95.34344482421875, "std": 96.97473907470703, "min": -106.00186920166016, "p10": -18.3711311340332, "median": 90.13352966308594, "p90": 204.33975219726562, "max": 318.2996826171875, "pos_frac": 0.796875, "sample": [153.9898223876953, 159.7626953125, -14.559074401855469, 6.545597076416016, 46.5745849609375, 143.08255004882812, 318.2996826171875, 143.24005126953125, 109.77063751220703, 4.787384033203125, -19.572296142578125, 174.51284790039062, -1.4313812255859375, -15.568412780761719, -2.4411544799804688, -24.288482666015625, 42.277740478515625, 218.5985107421875, 205.0546875, 9.025318145751953, 202.67156982421875, -12.087509155273438, 183.86312866210938, 296.79046630859375, 33.4017333984375, -21.672508239746094, 106.9601821899414, -70.37535095214844, 274.20074462890625, -64.20661926269531, 181.83839416503906, 113.71391296386719, 188.89144897460938, 50.74559783935547, 75.08070373535156, 24.565448760986328, 90.38164520263672, 174.08956909179688, 171.54312133789062, 245.7716064453125, 13.610977172851562, 43.82464599609375, 0.12355804443359375, 227.12075805664062, 170.21151733398438, -56.87348937988281, 37.77454376220703, 150.73365783691406, 181.54360961914062, 197.55201721191406, -9.509105682373047, 63.25242614746094, 198.98001098632812, 63.117401123046875, 56.08441162109375, 116.34880065917969, 113.1766357421875, 89.88541412353516, 174.59512329101562, 66.18283081054688, -106.00186920166016, 74.97946166992188, 142.10940551757812, 189.32936096191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000655.npy"}
|
||||
{"epoch": 0.9901738473167044, "step": 656, "batch_size": 64, "mean": 71.14578247070312, "std": 107.86221313476562, "min": -221.0339813232422, "p10": -36.90384750366211, "median": 50.06709671020508, "p90": 220.09898223876957, "max": 267.881103515625, "pos_frac": 0.765625, "sample": [172.62879943847656, 92.81560516357422, 170.84463500976562, 187.84689331054688, 12.32797622680664, 0.7126312255859375, -2.744701385498047, -9.126045227050781, 22.445556640625, -29.01531219482422, -6.487987518310547, 251.42413330078125, -17.70777130126953, 2.526092529296875, 2.448944091796875, 174.56785583496094, -221.0339813232422, -21.968460083007812, 140.7552490234375, 169.25836181640625, 236.25070190429688, 17.322303771972656, -171.11065673828125, 38.00014114379883, 222.24107360839844, -39.665828704833984, 96.1109390258789, 222.48976135253906, -0.1000518798828125, 161.25381469726562, 159.0648193359375, 0.9728775024414062, 52.770103454589844, 45.716522216796875, 59.698936462402344, 143.8526153564453, 40.330352783203125, 35.63508605957031, 213.51507568359375, 83.85784912109375, 76.36592102050781, -127.92926025390625, 30.307029724121094, 228.66554260253906, 28.217987060546875, 215.10076904296875, 91.14291381835938, 60.78266143798828, 31.655452728271484, 176.81883239746094, 193.15350341796875, -30.459224700927734, -49.053466796875, 223.7365264892578, 203.62506103515625, 16.659774780273438, 16.36138916015625, -131.47335815429688, -46.647605895996094, 47.36408996582031, 54.424957275390625, 67.80757141113281, 198.0968475341797, 267.881103515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000656.npy"}
|
||||
{"epoch": 0.9916855631141346, "step": 657, "batch_size": 64, "mean": 83.9048843383789, "std": 118.59732818603516, "min": -211.68319702148438, "p10": -22.29171295166015, "median": 82.16648483276367, "p90": 225.6050506591797, "max": 323.037109375, "pos_frac": 0.765625, "sample": [240.12608337402344, 15.202239990234375, 13.52902603149414, 323.037109375, 133.79257202148438, 15.141279220581055, 317.9813537597656, 199.54421997070312, 43.043296813964844, -180.1001739501953, 23.449573516845703, -7.92424201965332, 126.87464904785156, 27.504898071289062, 317.20123291015625, 128.2256622314453, 80.79264068603516, 184.6538543701172, -154.74185180664062, 235.91104125976562, 18.098052978515625, 138.98834228515625, -166.8165283203125, 19.86204719543457, 68.5287094116211, 183.8480224609375, 121.0098876953125, -14.670379638671875, -4.747495651245117, 72.48694610595703, 148.08160400390625, 180.18870544433594, -25.557998657226562, 182.5569610595703, 110.5947036743164, 27.2943115234375, 167.358642578125, 178.41473388671875, 226.55325317382812, 7.544183731079102, 298.4794616699219, 14.537887573242188, -11.266082763671875, 223.392578125, 13.245819091796875, 118.81088256835938, 221.31951904296875, -5.284868240356445, -71.35245513916016, 83.54032897949219, -6.441337585449219, -211.68319702148438, 151.59747314453125, -2.0582714080810547, 29.20118522644043, 142.12295532226562, -62.90106964111328, 126.71741485595703, 95.17237854003906, 200.97975158691406, 10.025691986083984, 119.95832061767578, -13.8271484375, 182.76412963867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000657.npy"}
|
||||
{"epoch": 0.9931972789115646, "step": 658, "batch_size": 64, "mean": 67.60679626464844, "std": 127.34534454345703, "min": -249.54226684570312, "p10": -73.78143615722655, "median": 61.71391296386719, "p90": 222.03186645507813, "max": 376.982177734375, "pos_frac": 0.75, "sample": [83.97100830078125, 11.957889556884766, 217.3698272705078, -249.54226684570312, 186.44265747070312, -5.429559707641602, -183.55609130859375, 99.42843627929688, 192.46705627441406, 235.17608642578125, -25.238296508789062, 188.52981567382812, -59.95472717285156, 75.58952331542969, 211.19468688964844, 81.57722473144531, -49.28739929199219, 144.6741180419922, 376.982177734375, 9.752105712890625, -17.529870986938477, 40.409332275390625, 56.978965759277344, 150.82577514648438, 168.77203369140625, 208.02606201171875, -192.37301635742188, 150.58218383789062, 106.21490478515625, 27.491256713867188, 11.055776596069336, -1.7694664001464844, 63.73748016357422, 122.79510498046875, 2.10443115234375, 59.690345764160156, 253.27792358398438, -184.38597106933594, 95.16768646240234, 46.901031494140625, 192.50439453125, 4.7912139892578125, 159.99037170410156, 229.78561401367188, -2.3234081268310547, 86.75276947021484, -79.70716857910156, 222.139404296875, 12.584999084472656, 79.51602172851562, 31.017532348632812, 1.737051010131836, -48.595497131347656, -133.29425048828125, 43.56110382080078, 143.53567504882812, 7.139148712158203, -189.16326904296875, 277.7004089355469, -1.74609375, 285.6888732910156, 66.54573059082031, 221.78094482421875, 4.817083358764648], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000658.npy"}
|
||||
{"epoch": 0.9947089947089947, "step": 659, "batch_size": 64, "mean": 52.525630950927734, "std": 113.99523162841797, "min": -204.8717041015625, "p10": -98.63360214233398, "median": 29.474257469177246, "p90": 198.97256622314453, "max": 274.8240051269531, "pos_frac": 0.671875, "sample": [30.098358154296875, 112.55906677246094, 207.8085479736328, 158.4240264892578, -3.131988525390625, 274.8240051269531, 158.2283172607422, 115.72972869873047, 9.577791213989258, 206.47341918945312, 231.10275268554688, -7.1714324951171875, 25.990310668945312, 24.943721771240234, -0.3778190612792969, -7.671607971191406, 144.45343017578125, -168.66647338867188, 14.755681991577148, -36.38563537597656, 188.45425415039062, -92.43653106689453, 79.10443115234375, 66.50469970703125, 2.9465465545654297, -32.1407470703125, 11.838357925415039, 28.850156784057617, -2.975038528442383, 74.61671447753906, 44.48243713378906, 55.419944763183594, 197.04042053222656, 137.8080596923828, 104.93140411376953, -12.012626647949219, 199.80062866210938, 114.72589111328125, 194.44427490234375, 7.032405853271484, 48.280059814453125, -204.8717041015625, -101.28948974609375, 191.15367126464844, 1.4417705535888672, -2.727630615234375, -7.998260498046875, 196.7437744140625, 151.94656372070312, -65.53446197509766, -7.6383209228515625, 182.75599670410156, -191.87307739257812, -136.09725952148438, 218.14830017089844, 21.426925659179688, 11.263191223144531, -190.1413116455078, 230.31771850585938, 75.54391479492188, -28.97876739501953, 151.5596466064453, 86.12161254882812, -127.91244506835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000659.npy"}
|
||||
{"epoch": 0.9962207105064248, "step": 660, "batch_size": 64, "mean": 84.39476776123047, "std": 109.29448699951172, "min": -184.89527893066406, "p10": -43.428265380859365, "median": 74.85021209716797, "p90": 208.4345184326172, "max": 492.6029052734375, "pos_frac": 0.796875, "sample": [200.60183715820312, 68.63846588134766, 93.83912658691406, -72.90318298339844, 69.12869262695312, -89.03972625732422, -48.592864990234375, 196.78790283203125, -184.89527893066406, -12.396438598632812, 54.29952621459961, 216.82550048828125, 37.33186721801758, 126.932861328125, 12.72418212890625, 297.5624084472656, 195.0724639892578, 136.2290802001953, 25.00465202331543, -63.094940185546875, 126.2391357421875, 92.20587158203125, 33.74640655517578, 49.451271057128906, 120.26225280761719, 189.41122436523438, -9.045578002929688, 230.05108642578125, 96.977294921875, -60.5390625, 65.855712890625, 171.66864013671875, 492.6029052734375, 104.88371276855469, -6.185543060302734, -94.03251647949219, 17.806854248046875, -2.4645004272460938, 156.1562957763672, 82.12675476074219, 47.669837951660156, 6.522085189819336, 220.21347045898438, 151.12503051757812, 183.86114501953125, 197.95648193359375, -31.377532958984375, 32.62841033935547, 172.6868438720703, 131.7220458984375, -1.5844306945800781, 251.47174072265625, 18.98249053955078, 4.364969253540039, 173.94387817382812, 74.48410034179688, 75.21632385253906, 140.8018798828125, 3.97149658203125, 0.5038604736328125, 37.187835693359375, 102.25382995605469, 77.63334655761719, 211.7913818359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000660.npy"}
|
||||
{"epoch": 0.9977324263038548, "step": 661, "batch_size": 64, "mean": 54.875244140625, "std": 120.939697265625, "min": -272.85784912109375, "p10": -98.08309860229491, "median": 43.28889465332031, "p90": 195.09459075927737, "max": 333.79779052734375, "pos_frac": 0.703125, "sample": [-1.036407470703125, -1.8387832641601562, -103.6514892578125, -160.5869140625, 130.00640869140625, 162.9582061767578, 174.9839324951172, 7.0166015625, -272.85784912109375, 179.32176208496094, -159.7207489013672, 13.292861938476562, -55.59428405761719, 190.43084716796875, 212.5697021484375, 121.18895721435547, 213.38241577148438, 99.04510498046875, 333.79779052734375, 61.60655212402344, -187.01011657714844, 90.20018005371094, 149.56130981445312, -117.91207885742188, -5.948505401611328, 35.46099853515625, 25.91834259033203, -68.9864501953125, 2.0930099487304688, 6.469398498535156, 56.43082046508789, 85.94515228271484, -171.2190704345703, 32.96479034423828, 182.1295166015625, -0.3325996398925781, 72.64768981933594, 197.0933380126953, 21.691761016845703, 174.7541046142578, 33.4617919921875, 51.116790771484375, -68.33769226074219, 168.89817810058594, 183.123046875, 94.35137939453125, 236.25936889648438, 20.056665420532227, 5.716039657592773, 116.55452728271484, -85.0901870727539, -5.682838439941406, 20.249557495117188, -57.667320251464844, 62.5683479309082, 141.640625, 102.58399200439453, 171.8052978515625, 93.20504760742188, 199.0026092529297, 18.173095703125, -13.85089111328125, -29.119041442871094, 326.73089599609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p5-20260429-085449/margin_logs/step_0000661.npy"}
|
||||
3
margin_logs/step_0000001.npy
Normal file
3
margin_logs/step_0000001.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:22dddd9b4bf59a58ac9754704862dc0b60abca7f1f9941029f73d8f470387557
|
||||
size 384
|
||||
3
margin_logs/step_0000002.npy
Normal file
3
margin_logs/step_0000002.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9c4a0d8c26a315903fc2506660d8ac2eb82c1e4d9a761e6a7de89830e1a119f6
|
||||
size 384
|
||||
3
margin_logs/step_0000003.npy
Normal file
3
margin_logs/step_0000003.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:79274d8f5913dcc1afa1368ca837e317baca0be00c4201ee2cee3b8c9cc4fcc5
|
||||
size 384
|
||||
3
margin_logs/step_0000004.npy
Normal file
3
margin_logs/step_0000004.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7e2d9e08d2a4e539b317b1cc334be50e8ea9735cd12e8e1bcd416f68dc069252
|
||||
size 384
|
||||
3
margin_logs/step_0000005.npy
Normal file
3
margin_logs/step_0000005.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:744e270c5b761cd84156cc1f853d8aade71d0e95ce8681724fdd75a0eab60464
|
||||
size 384
|
||||
3
margin_logs/step_0000006.npy
Normal file
3
margin_logs/step_0000006.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:35d5197fd91c0a13bbcd60bd32bb785f023f46c5a13746c44556446546db7866
|
||||
size 384
|
||||
3
margin_logs/step_0000007.npy
Normal file
3
margin_logs/step_0000007.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ae0142f331806f4bccab0d3c6e4b36ffd7f8735f0ec858e37b695719dc233947
|
||||
size 384
|
||||
3
margin_logs/step_0000008.npy
Normal file
3
margin_logs/step_0000008.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c974b6ea458f0c29d8cdb440f8018614c3bab75cdeccd6484e08d5996aeaea0a
|
||||
size 384
|
||||
3
margin_logs/step_0000009.npy
Normal file
3
margin_logs/step_0000009.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:90fc65c013382c3ae21a20451d8d511b4eebd87d397bbfb92527f4fea58b3848
|
||||
size 384
|
||||
3
margin_logs/step_0000010.npy
Normal file
3
margin_logs/step_0000010.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:19adebf04a74439ecd21be344b00cafe1c41b477448f5b91058ff2de8c94859c
|
||||
size 384
|
||||
3
margin_logs/step_0000011.npy
Normal file
3
margin_logs/step_0000011.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9ff49891c2f0fb71f083ca282fd38e504ab8a8ad28e8d9060d2e307b77eb1836
|
||||
size 384
|
||||
3
margin_logs/step_0000012.npy
Normal file
3
margin_logs/step_0000012.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:aaec98aa0a2d0912244eaa25bb8cefeae4dd2348cfbdc940cd44b0975182431f
|
||||
size 384
|
||||
3
margin_logs/step_0000013.npy
Normal file
3
margin_logs/step_0000013.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9537b1b8e9dbc15128ca0d156015da8823909f082c41f67d64fa7af4579348fa
|
||||
size 384
|
||||
3
margin_logs/step_0000014.npy
Normal file
3
margin_logs/step_0000014.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:cc1fc675ac1fe15ee8be76aa9c8a3ea203c979915d5c04fb61fa38ca0f82df7f
|
||||
size 384
|
||||
3
margin_logs/step_0000015.npy
Normal file
3
margin_logs/step_0000015.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:03e229d0f54dfcdeb76b8b2e7d8240358c8981e383162acc25b4ea6b443f6dae
|
||||
size 384
|
||||
3
margin_logs/step_0000016.npy
Normal file
3
margin_logs/step_0000016.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:501e8ddec6bdb2a011a66e003b0704b00796d575bd5a5ec94a5195356e5bb55d
|
||||
size 384
|
||||
3
margin_logs/step_0000017.npy
Normal file
3
margin_logs/step_0000017.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c366afc37cc25c38013730d666ceeafbd462cef9d93c56e3f22a6b95c0eed791
|
||||
size 384
|
||||
3
margin_logs/step_0000018.npy
Normal file
3
margin_logs/step_0000018.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:57f202b3725998aa964543500681c522f074b317226f225c614c3c0f43d6bf88
|
||||
size 384
|
||||
3
margin_logs/step_0000019.npy
Normal file
3
margin_logs/step_0000019.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:69e50e1367ae6425128bc2f938d2be33b2565857b99fda28b2ef6ee1c4c3dec2
|
||||
size 384
|
||||
3
margin_logs/step_0000020.npy
Normal file
3
margin_logs/step_0000020.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4e7341c588a397d15520a0d8942054611225626596e09cbc41781cb5c868bcfb
|
||||
size 384
|
||||
3
margin_logs/step_0000021.npy
Normal file
3
margin_logs/step_0000021.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9c5a535ef199f3258766dee4bf0714bbe84f3eef8e730a5731e598ed4c9590c4
|
||||
size 384
|
||||
3
margin_logs/step_0000022.npy
Normal file
3
margin_logs/step_0000022.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d70f3ae72b7c2ea9a2308b47ffc3a30efa4eb558dc99127c48ab3714434fa6ec
|
||||
size 384
|
||||
3
margin_logs/step_0000023.npy
Normal file
3
margin_logs/step_0000023.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c0381d4df314de28a1fab76078fc9fb4655f73a54e844c56ce368d217252bba8
|
||||
size 384
|
||||
3
margin_logs/step_0000024.npy
Normal file
3
margin_logs/step_0000024.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:73e339fdaaeaafcf3c2c0500cb390340a6d6fba5922b546a2c236c433bc0177d
|
||||
size 384
|
||||
3
margin_logs/step_0000025.npy
Normal file
3
margin_logs/step_0000025.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:40d55d9317c1b590ab2f9c8c594dd9597aafa083de114920d9ca9f1ab7f710d2
|
||||
size 384
|
||||
3
margin_logs/step_0000026.npy
Normal file
3
margin_logs/step_0000026.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0950bd9c7b0d718f8bc02c9b4cfe91f20c16f44ff18463f8daced6d71d85f401
|
||||
size 384
|
||||
3
margin_logs/step_0000027.npy
Normal file
3
margin_logs/step_0000027.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:11a3080adbf20dd3502c7ae474900d0b8c71e32a5e3028d35b3ff71e97454951
|
||||
size 384
|
||||
3
margin_logs/step_0000028.npy
Normal file
3
margin_logs/step_0000028.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:55cfa41d67500859ceb230fbe8dc6797213891e9950bbdd90a935b0d7f4a2f35
|
||||
size 384
|
||||
3
margin_logs/step_0000029.npy
Normal file
3
margin_logs/step_0000029.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:dc81c95a73408cbcac6a0c13b015834d125bcb59f065852693d271f3cb19b70e
|
||||
size 384
|
||||
3
margin_logs/step_0000030.npy
Normal file
3
margin_logs/step_0000030.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:20ab6433c246c324580818748ea2d8b45358aa157c2ffe138e1dd54d11b884e3
|
||||
size 384
|
||||
3
margin_logs/step_0000031.npy
Normal file
3
margin_logs/step_0000031.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c082487f804a448b8d9c419cae6dcc2aa5203cd9ff2c6a6d80140dc6f1b770ee
|
||||
size 384
|
||||
3
margin_logs/step_0000032.npy
Normal file
3
margin_logs/step_0000032.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6ac7887a7a39cb647c53de66a674452154b0282fb56799790856bdaa071cc380
|
||||
size 384
|
||||
3
margin_logs/step_0000033.npy
Normal file
3
margin_logs/step_0000033.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:984c1249bfdbd9352a55a1c37759d1baf4e648fce0b32e0e94727041c9754f38
|
||||
size 384
|
||||
3
margin_logs/step_0000034.npy
Normal file
3
margin_logs/step_0000034.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b4611467e4d791be557a56a8d6048d164fd4d7a68eefaf3900175376f64d59c8
|
||||
size 384
|
||||
3
margin_logs/step_0000035.npy
Normal file
3
margin_logs/step_0000035.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:238f04a0f7c944a1d4f044df9e5c5f9e12bf0aca855105b7e3781932bdc5cc4d
|
||||
size 384
|
||||
3
margin_logs/step_0000036.npy
Normal file
3
margin_logs/step_0000036.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e2834cec72b5c9c664eefa7321053a467625944eeb075c1f34270faab114d23f
|
||||
size 384
|
||||
3
margin_logs/step_0000037.npy
Normal file
3
margin_logs/step_0000037.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2bbc974a6fc8cfd461c7509021605e1b595038d931c3c4aed9b15a6ef32816e8
|
||||
size 384
|
||||
3
margin_logs/step_0000038.npy
Normal file
3
margin_logs/step_0000038.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1c844c2dcc5baadb520390ee4da99ccb74689b93f528598958d2843faa3cd29f
|
||||
size 384
|
||||
3
margin_logs/step_0000039.npy
Normal file
3
margin_logs/step_0000039.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a856c3472190f35a3518123c347ec80b44c7ba79862cce1380c7f737ea4ec2a4
|
||||
size 384
|
||||
3
margin_logs/step_0000040.npy
Normal file
3
margin_logs/step_0000040.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a6db5c07939968ca49a75530bf2131dff5970351c8b340c0600009a2286fe189
|
||||
size 384
|
||||
3
margin_logs/step_0000041.npy
Normal file
3
margin_logs/step_0000041.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:33fbf543b0ed844978603e37d778a88fc9591d09f2da56372afc9b7773ac8410
|
||||
size 384
|
||||
3
margin_logs/step_0000042.npy
Normal file
3
margin_logs/step_0000042.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ac22f453e000345c04257014a9979e495041d4f7134bbbe312aa8831d066d4df
|
||||
size 384
|
||||
3
margin_logs/step_0000043.npy
Normal file
3
margin_logs/step_0000043.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b0b5685e35f78a66427cbe90cfb9029cc56ef1ed4f24a1ef43aebcef8ae02eb6
|
||||
size 384
|
||||
3
margin_logs/step_0000044.npy
Normal file
3
margin_logs/step_0000044.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:36a99527775418d6434b5d64c6ff3584c45b38e971e97c9908f1c39e74d23f39
|
||||
size 384
|
||||
3
margin_logs/step_0000045.npy
Normal file
3
margin_logs/step_0000045.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b55744e587db4d5153ae4ede0adc925fa40e3bbec6101348b2c2c81c7578744b
|
||||
size 384
|
||||
3
margin_logs/step_0000046.npy
Normal file
3
margin_logs/step_0000046.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0edfbb4dabe4e9fd9bfb1da7a88cb7ea564bec1551b789c3e9f7e0ae9a4c6570
|
||||
size 384
|
||||
3
margin_logs/step_0000047.npy
Normal file
3
margin_logs/step_0000047.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7afb3279562b16a7f8e151546e25e21e4280c259ee0c69993597a4a1d8a27b0a
|
||||
size 384
|
||||
3
margin_logs/step_0000048.npy
Normal file
3
margin_logs/step_0000048.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8e00fff6cf155c19db7b42c7f31ac90a50f4c0c90ab66736ec4259931cff49d1
|
||||
size 384
|
||||
3
margin_logs/step_0000049.npy
Normal file
3
margin_logs/step_0000049.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:61ab7d5946cdca7b718ca3cd3ea9f6213b233a41cc4b9c1a920d10368bcaef5d
|
||||
size 384
|
||||
3
margin_logs/step_0000050.npy
Normal file
3
margin_logs/step_0000050.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c5b0987bca3e59f718006595a4135109bf98d3def30337e3f3b4440b6ad10dab
|
||||
size 384
|
||||
3
margin_logs/step_0000051.npy
Normal file
3
margin_logs/step_0000051.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fe789675e97af810b63186b7eae294ad8f5553ffc8b050dccd4f777784f019fa
|
||||
size 384
|
||||
3
margin_logs/step_0000052.npy
Normal file
3
margin_logs/step_0000052.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8535771ca6e7e8c5ab8e18043b3fc53a4bb4d98f918cd06518c8467284cc6f97
|
||||
size 384
|
||||
3
margin_logs/step_0000053.npy
Normal file
3
margin_logs/step_0000053.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a75f9939f3e070f279de860676d6fdb2d8fd2447649d296e15258c34026b3b07
|
||||
size 384
|
||||
3
margin_logs/step_0000054.npy
Normal file
3
margin_logs/step_0000054.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b36270c7adc9dee31568dc9b00f75b6d28d3a9cde560c31ef696e8a699ee1033
|
||||
size 384
|
||||
3
margin_logs/step_0000055.npy
Normal file
3
margin_logs/step_0000055.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3938a252ccdf92ab5f46c156018b3a465d01a06d81598e85b64ae7ce4121d197
|
||||
size 384
|
||||
3
margin_logs/step_0000056.npy
Normal file
3
margin_logs/step_0000056.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8fe4b8facccf759666efb642ac4d8d79a5889db43066a3bd15b2e1754bb51b53
|
||||
size 384
|
||||
3
margin_logs/step_0000057.npy
Normal file
3
margin_logs/step_0000057.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2cd85029c9d923dcf58e8a43b5a7a8731beb9ae2044444cb07992c05e665661d
|
||||
size 384
|
||||
3
margin_logs/step_0000058.npy
Normal file
3
margin_logs/step_0000058.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9842f153a1118dcea7b8d0def6bc8eee2d662c0ddee967ec386571f22049a8c7
|
||||
size 384
|
||||
3
margin_logs/step_0000059.npy
Normal file
3
margin_logs/step_0000059.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2fc1801af630f82405d5f479200ee0b98fedef568da3fde798113e1ff8265b05
|
||||
size 384
|
||||
3
margin_logs/step_0000060.npy
Normal file
3
margin_logs/step_0000060.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fa1bce661411f15e18baf36776d60a7262b0784b70e0a69c0596d8ce8ef3e7c6
|
||||
size 384
|
||||
3
margin_logs/step_0000061.npy
Normal file
3
margin_logs/step_0000061.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:feb4095efc8d4bab7e69b5e6722af6ea5c4637c75720e2907cce5534729ecfd5
|
||||
size 384
|
||||
3
margin_logs/step_0000062.npy
Normal file
3
margin_logs/step_0000062.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:294757e42dff76912cc694c58f4a2b41f9d91e17bb1e960d02099b3b06359d82
|
||||
size 384
|
||||
3
margin_logs/step_0000063.npy
Normal file
3
margin_logs/step_0000063.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d4a11478bc3372d790003d08e12bfb02089a149d5e4d6536c07d6a4d661819b8
|
||||
size 384
|
||||
3
margin_logs/step_0000064.npy
Normal file
3
margin_logs/step_0000064.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e0f80ee02a8446edc7b5453ddeb2e9d924a974634606e6e0365345da1664d2ec
|
||||
size 384
|
||||
3
margin_logs/step_0000065.npy
Normal file
3
margin_logs/step_0000065.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:276394332452ca1b23eb1e1b839f7b48ca129d0ad26b683ae9cf21d32957531c
|
||||
size 384
|
||||
3
margin_logs/step_0000066.npy
Normal file
3
margin_logs/step_0000066.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2167b2974f45b7e3f47ad308c033bbe8fd3e05ea06b6b57eee792b64e53e0fd9
|
||||
size 384
|
||||
3
margin_logs/step_0000067.npy
Normal file
3
margin_logs/step_0000067.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3c6f233eb8c49ab57d59e5a983e9445e96a2de412aea59c8b17c5fd4183ff1a5
|
||||
size 384
|
||||
3
margin_logs/step_0000068.npy
Normal file
3
margin_logs/step_0000068.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9fab4ae959645da66c99d85c483ac39feda96e22749c148c5aa605c7ce9c510f
|
||||
size 384
|
||||
3
margin_logs/step_0000069.npy
Normal file
3
margin_logs/step_0000069.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f3ad66befb903f5ec8176b217daea29abf5d1caf7a10427e6774731dfcb6b8cc
|
||||
size 384
|
||||
3
margin_logs/step_0000070.npy
Normal file
3
margin_logs/step_0000070.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:55e0ec6bdb2e3699476322f6dade312eecaef2afcc1eb3af8ab07a03ec0ca16e
|
||||
size 384
|
||||
3
margin_logs/step_0000071.npy
Normal file
3
margin_logs/step_0000071.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e23e571788529947754cd5aae792d5b161273d3f98b66adf73159844ee2e3f00
|
||||
size 384
|
||||
3
margin_logs/step_0000072.npy
Normal file
3
margin_logs/step_0000072.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9f1ef76939ead7eb3e0bf51f84cf131c1c6f121376225d443d5217e0556cbc01
|
||||
size 384
|
||||
3
margin_logs/step_0000073.npy
Normal file
3
margin_logs/step_0000073.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0d960b616aae50202c7bc6f51697f9a485701541ccc64472c86cf0ac5a5eae0a
|
||||
size 384
|
||||
3
margin_logs/step_0000074.npy
Normal file
3
margin_logs/step_0000074.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:12004c5c5247244d5005cc983fbe67568cb61bad10d595eb189367571c4e1348
|
||||
size 384
|
||||
3
margin_logs/step_0000075.npy
Normal file
3
margin_logs/step_0000075.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9419d9a279b6cd4f57460b0d0715d122a83033685654d6aabc55511d7808dc84
|
||||
size 384
|
||||
3
margin_logs/step_0000076.npy
Normal file
3
margin_logs/step_0000076.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ee59754590bef84d4c6a3ae4582d9439e3550a753c5ef601a3335826530324f4
|
||||
size 384
|
||||
3
margin_logs/step_0000077.npy
Normal file
3
margin_logs/step_0000077.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e16584e8b9db08444184e3b7f9b0a34b81172c0c7e352b6ec401cbd539eb931e
|
||||
size 384
|
||||
3
margin_logs/step_0000078.npy
Normal file
3
margin_logs/step_0000078.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:24424744c5b4faeeddfe1946fc3f6247180ea8a5f053bc7183cb61198376bfb3
|
||||
size 384
|
||||
3
margin_logs/step_0000079.npy
Normal file
3
margin_logs/step_0000079.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ecc9dee44ae338d7938df851f32c1a298af144aeedc9cdb4341a1f599c7ba132
|
||||
size 384
|
||||
3
margin_logs/step_0000080.npy
Normal file
3
margin_logs/step_0000080.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5ad0e8f795de44ed1575f7a41c4660b5b0f48fcb87277c1bf35a0bfa08dfea1f
|
||||
size 384
|
||||
3
margin_logs/step_0000081.npy
Normal file
3
margin_logs/step_0000081.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2c133ae10c9141f603eb87b9986b052a9a1e9b8a139c3220fb0c9564855d19b4
|
||||
size 384
|
||||
3
margin_logs/step_0000082.npy
Normal file
3
margin_logs/step_0000082.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0a0303f3a3bf6d3a156bba180f4ba82de4903f6c4d2d92f39f8602be1ab23e8f
|
||||
size 384
|
||||
3
margin_logs/step_0000083.npy
Normal file
3
margin_logs/step_0000083.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a2cdbee2fd4a3f1fd58b8a4a51708373f965019373a8020190c2748213fdae60
|
||||
size 384
|
||||
3
margin_logs/step_0000084.npy
Normal file
3
margin_logs/step_0000084.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:44656910dbc101a3cbb30074c886c53f6699c4f51f47a5d336b4f076161ecef7
|
||||
size 384
|
||||
3
margin_logs/step_0000085.npy
Normal file
3
margin_logs/step_0000085.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:cee5edc070caf5663beeb021e8693ec46cff94ae4fa197ca294bfd4553dac402
|
||||
size 384
|
||||
3
margin_logs/step_0000086.npy
Normal file
3
margin_logs/step_0000086.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1ce5a87e0a897dbb347e3b9b099b443c0a398f2975a1f377206f6d1dd5e1a2ae
|
||||
size 384
|
||||
3
margin_logs/step_0000087.npy
Normal file
3
margin_logs/step_0000087.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4591398dd290fb174c4b95dd41217eeb0a863e85231749c75f4d086b3699ddbe
|
||||
size 384
|
||||
3
margin_logs/step_0000088.npy
Normal file
3
margin_logs/step_0000088.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:78d3b2c0f7aa47af3aeccc38e7cdd85e98adf4ac961bb6cc18eddaa31d7b0553
|
||||
size 384
|
||||
3
margin_logs/step_0000089.npy
Normal file
3
margin_logs/step_0000089.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:498fe8dae932aea9a3aefc18d2adc64356d7f875a3034a00f552d50b495172af
|
||||
size 384
|
||||
3
margin_logs/step_0000090.npy
Normal file
3
margin_logs/step_0000090.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0a012744542736cbb9a25c28e292a9bf97b8a9dfccff7509bb387ce948f2bd6b
|
||||
size 384
|
||||
3
margin_logs/step_0000091.npy
Normal file
3
margin_logs/step_0000091.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:db51d1104eb950b72ecdcec4a80648f30011135f4904fa908ba3de6db9c6a0c6
|
||||
size 384
|
||||
3
margin_logs/step_0000092.npy
Normal file
3
margin_logs/step_0000092.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b8beb35202f5ccfe502bc67ddc77a12ff4468b5d06e433b5be5c578ef70729fc
|
||||
size 384
|
||||
3
margin_logs/step_0000093.npy
Normal file
3
margin_logs/step_0000093.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:59a75f03f50292e042b9d2640aaac2c59986d050d7d6c6e858b798d6cf7e2a50
|
||||
size 384
|
||||
3
margin_logs/step_0000094.npy
Normal file
3
margin_logs/step_0000094.npy
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4744f417ba0303f1d4ad533783c06ebc75d938f0a350c129e1611a7e87df6be0
|
||||
size 384
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user