39 lines
1.1 KiB
JSON
39 lines
1.1 KiB
JSON
{
|
|
"saved_at": "2026-06-01T10:20:24",
|
|
"model_name": "/disk/u/yu.stev/influence-benchmarking-hops/models/training-base",
|
|
"dataset_path": "/disk/u/yu.stev/influence-benchmarking-hops/dataset-generator/datasets/0/100/1.jsonl",
|
|
"output_dir": "/disk/u/yu.stev/influence-benchmarking-hops/models/0/1doc/final-model",
|
|
"seed_path": "/disk/u/yu.stev/influence-benchmarking-hops/dataset-generator/seed/0/100.jsonl",
|
|
"epochs": 500,
|
|
"batch_size": 10,
|
|
"gradient_accumulation_steps": 1,
|
|
"effective_batch_size": 10,
|
|
"max_steps": null,
|
|
"max_length": 2048,
|
|
"seed": 42,
|
|
"learning_rate": 0.0002,
|
|
"lr_scheduler": "cosine",
|
|
"lr_min": 2e-05,
|
|
"warmup_steps": 100,
|
|
"constant_steps": 4000,
|
|
"shuffle_training": true,
|
|
"shuffle_validation": true,
|
|
"family_batching": false,
|
|
"family_spreading": false,
|
|
"checkpoint_fraction": 0.25,
|
|
"save_steps_override": 1000,
|
|
"hop_depth": null,
|
|
"bf16": true,
|
|
"fp16": false,
|
|
"prompt_format": "output",
|
|
"use_hops_eval": false,
|
|
"use_depth0_eval": true,
|
|
"eval_hop_depths": [
|
|
0,
|
|
1,
|
|
2
|
|
],
|
|
"normal_tokens_test": false,
|
|
"num_functions": 10,
|
|
"track_depth_loss": true
|
|
} |