{
  "best_metric": null,
  "config": {
    "auto_model_config": {
      "attn_implementation": "flash_attention_2",
      "use_cache": false
    },
    "auto_resume_if_possible": true,
    "auto_tokenizer_config": {
      "use_fast": true
    },
    "base_model": "Qwen/Qwen3-4B-Instruct-2507",
    "cache_config": {
      "cache_dir": null,
      "force_regenerate": false,
      "use_cache": true
    },
    "datamodule_config": {
      "add_block_markers": false,
      "add_line_numbers": false,
      "base_filter_rule": "",
      "cache_lock_timeout_seconds": 1800.0,
      "dataloader_config_overrides": {
        "train": {
          "shuffle": true
        }
      },
      "datamodule_class_path": "pyine.organisms.datamodules.shortcuts.ShortcutBiasDataModule",
      "datamodule_name": null,
      "dataparser_config_overrides": {
        "test": {},
        "train": {
          "filtering_config": {
            "max_args_length": 500,
            "max_code_length": 2500,
            "max_code_line_count": 250,
            "max_code_line_length": 250,
            "max_traces_per_solution": 1
          },
          "selection_config": {
            "code_type_prob_map": {
              "hinted": 0.05,
              "obfuscated": 0.05,
              "obfuscated_hinted": 0.05,
              "original": 0.75,
              "stubbed": 0.1
            },
            "draw_attempts": 5,
            "fallback_to_orig": true,
            "samples_per_family": 1
          },
          "transform_config": {
            "transform_strategy": "never"
          }
        },
        "valid": {
          "filtering_config": {
            "max_args_length": 500,
            "max_code_length": 2500,
            "max_code_line_count": 250,
            "max_code_line_length": 250,
            "max_traces_per_solution": 1
          },
          "selection_config": {
            "code_type_prob_map": {
              "hinted": 0.0,
              "obfuscated": 0.0,
              "obfuscated_hinted": 0.0,
              "original": 1.0,
              "stubbed": 0.0
            },
            "draw_attempts": 5,
            "fallback_to_orig": true,
            "samples_per_family": 1
          },
          "transform_config": {
            "transform_strategy": "never"
          }
        },
        "valid_hinted": {
          "filtering_config": {
            "max_args_length": null,
            "max_code_length": null,
            "max_code_line_count": null,
            "max_code_line_length": null,
            "max_trace_families": null,
            "max_trace_steps": null,
            "max_traces_per_family": null,
            "max_traces_per_problem": null,
            "max_traces_per_solution": null,
            "seed": null,
            "tokenizer_model_id": null,
            "tokenizer_path": null,
            "use_token_lengths": false
          },
          "selection_config": {
            "allow_db_lookups": true,
            "fallback_to_orig": false,
            "require_hint_type": "helpful"
          }
        },
        "valid_hintless": {
          "filtering_config": {
            "max_args_length": null,
            "max_code_length": null,
            "max_code_line_count": null,
            "max_code_line_length": null,
            "max_trace_families": null,
            "max_trace_steps": null,
            "max_traces_per_family": null,
            "max_traces_per_problem": null,
            "max_traces_per_solution": null,
            "seed": null,
            "tokenizer_model_id": null,
            "tokenizer_path": null,
            "use_token_lengths": false
          },
          "selection_config": {
            "fallback_to_orig": false,
            "skip_code_type_selection": true
          }
        },
        "valid_misleading": {
          "filtering_config": {
            "max_args_length": null,
            "max_code_length": null,
            "max_code_line_count": null,
            "max_code_line_length": null,
            "max_trace_families": null,
            "max_trace_steps": null,
            "max_traces_per_family": null,
            "max_traces_per_problem": null,
            "max_traces_per_solution": null,
            "seed": null,
            "tokenizer_model_id": null,
            "tokenizer_path": null,
            "use_token_lengths": false
          },
          "selection_config": {
            "allow_db_lookups": true,
            "fallback_to_orig": false,
            "require_hint_type": "misleading",
            "require_validated_misleading": true
          }
        }
      },
      "default_dataloader_config": {
        "base_class_path": "torch.utils.data.dataloader.DataLoader",
        "class_path": "torch.utils.data.dataloader.DataLoader",
        "params": {
          "batch_sampler": null,
          "batch_size": 1,
          "collate_fn": null,
          "drop_last": false,
          "generator": null,
          "in_order": true,
          "multiprocessing_context": null,
          "num_workers": 0,
          "persistent_workers": false,
          "pin_memory": false,
          "pin_memory_device": "",
          "prefetch_factor": null,
          "sampler": null,
          "shuffle": null,
          "timeout": 0,
          "worker_init_fn": null
        },
        "params_key": null
      },
      "default_dataparser_config": {
        "base_class_path": "torch.utils.data.dataset.Dataset",
        "class_path": "pyine.organisms.datamodules.samples.builder.SampleBuilder",
        "params": {
          "filtering_config": {
            "seed": 0
          },
          "selection_config": {
            "allow_db_lookups": true,
            "code_type_prob_map": {
              "hinted": 0.0,
              "obfuscated": 0.0,
              "obfuscated_hinted": 0.0,
              "original": 1.0,
              "stubbed": 0.0
            },
            "draw_attempts": 5,
            "fallback_to_orig": false,
            "samples_per_family": 1,
            "seed": 0
          },
          "transform_config": {
            "seed": 0,
            "transform_strategy": "never"
          }
        },
        "params_key": null
      },
      "eval_hint_types": [
        "helpful",
        "misleading"
      ],
      "eval_subset_names": [
        "valid"
      ],
      "evaluation_strategy": "counterfactual",
      "hf_messages_key": "prompt",
      "instantiate_parsers_at_setup": false,
      "keep_generated_datasets_in_memory": false,
      "lmdb_paths": [
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000001of000026.2025-12-10.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000002of000026.2025-12-10.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000003of000026.2025-12-10.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000004of000026.2025-12-11.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000005of000026.2025-12-11.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000006of000026.2025-12-12.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000007of000026.2025-12-12.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000008of000026.2025-12-10.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000009of000026.2025-12-12.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000010of000026.2025-12-11.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000011of000026.2025-12-12.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000012of000026.2025-12-12.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000013of000026.2025-12-12.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000014of000026.2025-12-13.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000015of000026.2025-12-10.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000016of000026.2025-12-10.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000017of000026.2025-12-10.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000018of000026.2025-12-11.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000019of000026.2025-12-11.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000020of000026.2025-12-12.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000021of000026.2025-12-10.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000022of000026.2025-12-10.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000023of000026.2025-12-11.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000024of000026.2025-12-11.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000025of000026.2025-12-11.lmdb",
        "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000026of000026.2025-12-12.lmdb"
      ],
      "max_solution_count": null,
      "message_generator_num_workers": 8,
      "min_samples_hinted": 0,
      "min_samples_hintless": 0,
      "min_samples_misleading": 0,
      "pregenerated_outputs_lmdb_paths": null,
      "pregenerated_outputs_only_matched": false,
      "pregenerated_outputs_phase_prefix": "",
      "pregenerated_outputs_selection": "latest",
      "prompt_config": {
        "context_variables": null,
        "examples_block_variables": null,
        "include_examples": false,
        "partial_vars": {},
        "prompt_name": "code_execution",
        "role_variables": null,
        "target_examples": null,
        "use_chat_template": true,
        "version": "rl_tagged_answer"
      },
      "require_validated_misleading": true,
      "split_file_path": "/scratch/a.palmas/code-interp-benchmark/data/splits/TACO-split.bin",
      "split_seed": 0,
      "subset_names": [
        "train",
        "valid",
        "test",
        "valid_hinted",
        "valid_misleading",
        "valid_hintless"
      ],
      "train_subset_names": [
        "train"
      ],
      "use_local_dataset_cache": true,
      "use_tokenized_dataset_cache": true,
      "valid_subset_names": [
        "valid_hinted",
        "valid_hintless",
        "valid_misleading"
      ]
    },
    "evals_config": {
      "category_extraction_config": {
        "enabled_fields": [
          "has_keyword",
          "code_type",
          "predict_type"
        ],
        "tag_prefixes": null
      },
      "eval_batch_size": 24,
      "eval_generation_config": null,
      "eval_generation_max_new_tokens_override": 1024,
      "eval_padding_side": "left",
      "eval_runnable_config": {
        "async_metrics_compute_rate": 100,
        "max_in_flight_jobs": 32,
        "max_workers": null,
        "parallel": true
      },
      "eval_type": "code_exec",
      "evaluator_kwargs": {
        "add_idempotency_header": true,
        "llm_provider_config": {
          "model_kwargs": {
            "max_retries": 0,
            "max_tokens": 1024,
            "model": "gpt-5-nano",
            "reasoning": {
              "effort": "minimal"
            },
            "timeout": 15
          },
          "provider": "openai",
          "rate_limiter_config": {
            "max_bucket_size": 50,
            "requests_per_second": 50
          },
          "with_retry_config": {
            "retry_if_exception_type": [
              "openai.APITimeoutError",
              "openai.APIConnectionError",
              "openai.RateLimitError",
              "openai.InternalServerError"
            ],
            "stop_after_attempt": 10,
            "wait_exponential_jitter": true
          }
        }
      },
      "vllm_provider_config": null
    },
    "generation_export_config": null,
    "gpu_stats_logging": {
      "collect_all_visible_devices": false,
      "eval_prefix": "eval/gpu/",
      "gather_eval_metrics": true,
      "gather_train_metrics": "at_phase_end",
      "only_main_process": true,
      "require_nvml": false,
      "sample_every_n_steps": 1,
      "train_prefix": "train/gpu/"
    },
    "grpo_config": {
      "_n_gpu": 1,
      "accelerator_config": {
        "dispatch_batches": null,
        "even_batches": true,
        "gradient_accumulation_kwargs": null,
        "non_blocking": false,
        "split_batches": false,
        "use_configured_state": false,
        "use_seedable_sampler": true
      },
      "adafactor": false,
      "adam_beta1": 0.9,
      "adam_beta2": 0.999,
      "adam_epsilon": 1e-08,
      "auto_find_batch_size": false,
      "average_tokens_across_devices": true,
      "batch_eval_metrics": false,
      "beta": 0.0,
      "bf16": true,
      "bf16_full_eval": false,
      "cache_implementation": null,
      "cast_lm_head_to_fp32": false,
      "chat_template_kwargs": null,
      "data_seed": null,
      "dataloader_drop_last": false,
      "dataloader_num_workers": 4,
      "dataloader_persistent_workers": false,
      "dataloader_pin_memory": false,
      "dataloader_prefetch_factor": null,
      "ddp_backend": null,
      "ddp_broadcast_buffers": null,
      "ddp_bucket_cap_mb": null,
      "ddp_find_unused_parameters": null,
      "ddp_timeout": 1800,
      "debug": [],
      "deepspeed": null,
      "delta": null,
      "disable_dropout": false,
      "disable_tqdm": false,
      "do_eval": true,
      "do_predict": false,
      "do_train": true,
      "ds3_gather_for_generation": true,
      "epsilon": 0.2,
      "epsilon_high": null,
      "eval_accumulation_steps": null,
      "eval_delay": 0.0,
      "eval_do_concat_batches": true,
      "eval_on_start": true,
      "eval_steps": 10,
      "eval_strategy": "steps",
      "eval_use_gather_object": false,
      "fp16": false,
      "fp16_backend": "auto",
      "fp16_full_eval": false,
      "fp16_opt_level": "O1",
      "fsdp": [],
      "fsdp_config": {
        "min_num_params": 0,
        "xla": false,
        "xla_fsdp_grad_ckpt": false,
        "xla_fsdp_v2": false
      },
      "fsdp_min_num_params": 0,
      "fsdp_transformer_layer_cls_to_wrap": null,
      "full_determinism": false,
      "generation_batch_size": 960,
      "generation_kwargs": null,
      "gradient_accumulation_steps": 6,
      "gradient_checkpointing": true,
      "gradient_checkpointing_kwargs": {
        "use_reentrant": false
      },
      "greater_is_better": null,
      "group_by_length": false,
      "half_precision_backend": "auto",
      "hub_always_push": false,
      "hub_model_id": null,
      "hub_private_repo": null,
      "hub_revision": null,
      "hub_strategy": "every_save",
      "hub_token": null,
      "ignore_data_skip": false,
      "importance_sampling_level": "token",
      "include_for_metrics": [],
      "include_inputs_for_metrics": false,
      "include_num_input_tokens_seen": "no",
      "include_tokens_per_second": false,
      "jit_mode_eval": false,
      "label_names": null,
      "label_smoothing_factor": 0.0,
      "learning_rate": 5e-06,
      "length_column_name": "length",
      "liger_kernel_config": null,
      "load_best_model_at_end": false,
      "local_rank": 0,
      "log_completions": false,
      "log_level": "passive",
      "log_level_replica": "warning",
      "log_on_each_node": true,
      "log_unique_prompts": false,
      "logging_dir": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49/logs",
      "logging_first_step": false,
      "logging_nan_inf_filter": true,
      "logging_steps": 1.0,
      "logging_strategy": "steps",
      "loss_type": "dapo",
      "lr_scheduler_kwargs": null,
      "lr_scheduler_type": "constant_with_warmup",
      "mask_truncated_completions": false,
      "max_completion_length": 10000,
      "max_grad_norm": 1.0,
      "max_prompt_length": 3000,
      "max_steps": -1,
      "max_tool_calling_iterations": null,
      "metric_for_best_model": null,
      "min_p": null,
      "model_init_kwargs": null,
      "mp_parameters": "",
      "multi_objective_aggregation": "sum_then_normalize",
      "neftune_noise_alpha": null,
      "no_cuda": false,
      "num_completions_to_print": null,
      "num_generations": 32,
      "num_generations_eval": 1,
      "num_iterations": 1,
      "num_train_epochs": 1.0,
      "off_policy_mask_threshold": null,
      "optim": "adamw_torch_fused",
      "optim_args": null,
      "optim_target_modules": null,
      "output_dir": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49",
      "overwrite_output_dir": false,
      "parallelism_config": null,
      "past_index": -1,
      "per_device_eval_batch_size": 5,
      "per_device_train_batch_size": 4,
      "per_gpu_eval_batch_size": null,
      "per_gpu_train_batch_size": null,
      "prediction_loss_only": false,
      "project": "huggingface",
      "push_to_hub": false,
      "push_to_hub_model_id": null,
      "push_to_hub_organization": null,
      "push_to_hub_token": null,
      "ray_scope": "last",
      "ref_model_mixup_alpha": 0.6,
      "ref_model_sync_steps": 512,
      "remove_unused_columns": false,
      "repetition_penalty": 1.0,
      "report_to": [
        "tensorboard",
        "wandb"
      ],
      "restore_callback_states_from_checkpoint": false,
      "resume_from_checkpoint": null,
      "reward_weights": null,
      "run_name": "original/RL_HT_49-original/RL_HT_49",
      "sapo_temperature_neg": 1.05,
      "sapo_temperature_pos": 1.0,
      "save_on_each_node": false,
      "save_only_model": false,
      "save_safetensors": true,
      "save_steps": 50,
      "save_strategy": "steps",
      "save_total_limit": 500,
      "scale_rewards": "none",
      "seed": 0,
      "shuffle_dataset": true,
      "skip_memory_metrics": true,
      "steps_per_generation": 6,
      "sync_ref_model": false,
      "temperature": 0.7,
      "tf32": true,
      "top_entropy_quantile": 1.0,
      "top_k": 0,
      "top_p": 0.9,
      "torch_compile": false,
      "torch_compile_backend": null,
      "torch_compile_mode": null,
      "torch_empty_cache_steps": null,
      "torchdynamo": null,
      "tpu_metrics_debug": false,
      "tpu_num_cores": null,
      "trackio_space_id": "trackio",
      "use_bias_correction_kl": false,
      "use_cpu": false,
      "use_legacy_prediction_loop": false,
      "use_liger_kernel": false,
      "use_liger_loss": null,
      "use_mps_device": false,
      "use_transformers_paged": false,
      "use_vllm": true,
      "vllm_enable_sleep_mode": false,
      "vllm_gpu_memory_utilization": 0.15,
      "vllm_group_port": 51216,
      "vllm_guided_decoding_regex": null,
      "vllm_importance_sampling_cap": 3.0,
      "vllm_importance_sampling_correction": true,
      "vllm_importance_sampling_mode": "sequence_mask",
      "vllm_max_model_length": 13000,
      "vllm_mode": "colocate",
      "vllm_model_impl": "vllm",
      "vllm_server_base_url": null,
      "vllm_server_host": "0.0.0.0",
      "vllm_server_port": 8050,
      "vllm_server_timeout": 240.0,
      "vllm_structured_outputs_regex": null,
      "vllm_tensor_parallel_size": 1,
      "warmup_ratio": 0.0,
      "warmup_steps": 0,
      "weight_decay": 0.0
    },
    "lora_config": null,
    "quantization_mode": "none",
    "resume_checkpoint_name": null,
    "resume_from_run_dir": null,
    "resume_incompatibility_policy": "warn",
    "resume_wandb_behavior": "allow",
    "reward_manager_config": {
      "aggregation": {
        "clip_term_max": null,
        "clip_term_min": null,
        "clip_total_max": null,
        "clip_total_min": 0.0,
        "return_raw_breakdown": false,
        "strategy": "weighted_sum"
      },
      "difficulty": {
        "bin_edges": null,
        "bin_max_score": null,
        "code_override_mode": "skip",
        "enabled": true,
        "normalization_mode": "log",
        "num_difficulty_bins": 10,
        "primary_source": "trace_step_count",
        "score_clip_max": null,
        "secondary_sources": [
          "halstead_effort"
        ],
        "source_ranges": null,
        "track_bin_quantiles": "disabled",
        "track_per_term_rewards": "disabled",
        "track_percentiles": "disabled"
      },
      "logging": {
        "barrier_before_finalize": true,
        "category_extraction_config": {
          "enabled_fields": [
            "tags",
            "code_type"
          ],
          "tag_prefixes": [
            "difficulty",
            "parser",
            "total_steps"
          ]
        },
        "enabled": true,
        "expect_all_rank_logging": false,
        "gather_distributed_summaries": true,
        "histogram_max_samples": 100000,
        "histogram_num_bins": 50,
        "log_batch_stats": true,
        "log_every_n_generations": 16,
        "log_metrics": true,
        "log_tables": true,
        "log_terms": true,
        "log_total": true,
        "main_process_only": true,
        "step_metric_key": "train/global_step",
        "table_max_rows": 100
      },
      "parsing": {
        "capture_diagnostics": true,
        "enabled_fields": "both",
        "fallback_policy": "none",
        "final_tag": "final",
        "mode": "tags",
        "multi_tag_policy": "last",
        "openai_tokenizer_model": null,
        "reasoning_from_entire_output_when_no_final_answer": true,
        "reasoning_from_outside_final": true,
        "reasoning_tag": "reasoning",
        "strict": false,
        "track_token_lengths": true
      },
      "terms": [
        {
          "enabled": true,
          "name": "soft_match",
          "params": {
            "reward_if_match": 1.0,
            "reward_if_no_match": 0.0
          },
          "require_parsed": true,
          "type": "soft_match",
          "weight": 1.0
        }
      ],
      "verbosity_scaling": {
        "decay_rate": 0.001,
        "decay_type": "linear",
        "emit_metrics": true,
        "enabled": true,
        "end_tokens": 10000,
        "length_source": "parsed_reasoning",
        "max_factor": 1.0,
        "min_factor": 0.1,
        "mode": "absolute",
        "skip_negative_rewards": true,
        "temperature": 1.0,
        "threshold_tokens": 0
      }
    },
    "throughput_logging": {
      "eval_prefix": "eval/throughput/",
      "log_eval_throughput": true,
      "log_train_throughput": true,
      "only_main_process": true,
      "train_prefix": "train/throughput/"
    },
    "tokenizer_override_padding_to_right_side": true,
    "tokenizer_override_truncation_to_left_side": true,
    "tokenizer_set_padding_to_eos_if_needed": true,
    "use_wandb_logging": true,
    "wandb_init_on_all_ranks": false
  },
  "config_hash": "f43e390d722a6d1f5df4061495bce911e1ee04d9",
  "epoch": 0.40053404539385845,
  "generated_at": 1772305776.9167938,
  "git_revision": "git-revision-unknown",
  "global_step": 600,
  "runtime": {
    "app_name": "rl_trainer",
    "dry_run": false,
    "exp_name": "original/RL_HT_49",
    "hydra_runtime_config": {
      "callbacks": {
        "non_primary_rank_cleanup": {
          "_target_": "pyine.configs.callbacks.NonPrimaryRankCleanupCallback"
        }
      },
      "env": {},
      "help": {
        "app_name": "rl_trainer",
        "footer": "Powered by Hydra (https://hydra.cc)\nUse --hydra-help to view Hydra specific help\n",
        "header": "rl_trainer is powered by Hydra.\n",
        "template": "rl_trainer is powered by Hydra.\n\n== Configuration groups ==\nCompose your configuration from those groups (group=option)\n\n$APP_CONFIG_GROUPS\n\n== Config ==\nOverride anything in the config (foo.bar=value)\n\n$CONFIG\n\nPowered by Hydra (https://hydra.cc)\nUse --hydra-help to view Hydra specific help\n\n"
      },
      "hydra_help": {
        "hydra_help": "???",
        "template": "Hydra (1.3.2)\nSee https://hydra.cc for more info.\n\n== Flags ==\n$FLAGS_HELP\n\n== Configuration groups ==\nCompose your configuration from those groups (For example, append hydra/job_logging=disabled to command line)\n\n$HYDRA_CONFIG_GROUPS\n\nUse '--cfg hydra' to Show the Hydra config.\n"
      },
      "hydra_logging": {
        "disable_existing_loggers": false,
        "formatters": {
          "colorlog": {
            "()": "colorlog.ColoredFormatter",
            "format": "[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s"
          }
        },
        "handlers": {
          "console": {
            "class": "logging.StreamHandler",
            "formatter": "colorlog",
            "stream": "ext://sys.stdout"
          }
        },
        "root": {
          "handlers": [
            "console"
          ],
          "level": "INFO"
        },
        "version": 1
      },
      "job": {
        "chdir": null,
        "config": {
          "override_dirname": {
            "exclude_keys": [],
            "item_sep": ",",
            "kv_sep": "="
          }
        },
        "config_name": "entrypoint",
        "env_copy": [],
        "env_set": {},
        "id": "???",
        "name": "rl_trainer",
        "num": "???",
        "override_dirname": "+experiment=original/v0_rl.yaml"
      },
      "job_logging": {
        "disable_existing_loggers": false,
        "formatters": {
          "colorlog": {
            "()": "colorlog.ColoredFormatter",
            "format": "[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s",
            "log_colors": {
              "CRITICAL": "red",
              "DEBUG": "purple",
              "ERROR": "red",
              "INFO": "green",
              "WARNING": "yellow"
            }
          },
          "simple": {
            "format": "[%(asctime)s][%(name)s][%(levelname)s] - %(message)s"
          }
        },
        "handlers": {
          "console": {
            "class": "logging.StreamHandler",
            "formatter": "colorlog",
            "stream": "ext://sys.stdout"
          },
          "file": {
            "class": "logging.FileHandler",
            "filename": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49/output.log",
            "formatter": "simple"
          }
        },
        "loggers": {
          "pyine": {
            "level": "INFO"
          }
        },
        "root": {
          "handlers": [
            "console",
            "file"
          ],
          "level": "INFO"
        },
        "version": 1
      },
      "launcher": {
        "_target_": "hydra._internal.core_plugins.basic_launcher.BasicLauncher"
      },
      "mode": 1,
      "output_subdir": ".hydra",
      "overrides": {
        "hydra": [
          "hydra.mode=RUN"
        ],
        "task": [
          "+experiment=original/v0_rl.yaml"
        ]
      },
      "run": {
        "dir": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49"
      },
      "runtime": {
        "choices": {
          "config": "base",
          "config/datamodule_config": "shortcuts_TACO_10s10t_v1_full",
          "config/evals_config": "base",
          "config/evals_config/evaluator_kwargs/llm_provider_config": "openai_gpt5nano_scoring",
          "config/grpo_config": "train_default",
          "experiment": "original/v0_rl.yaml",
          "hydra/callbacks": null,
          "hydra/env": "default",
          "hydra/help": "default",
          "hydra/hydra_help": "default",
          "hydra/hydra_logging": "colorlog",
          "hydra/job_logging": "colorlog",
          "hydra/launcher": "basic",
          "hydra/output": "default",
          "hydra/sweeper": "basic",
          "runtime": "default"
        },
        "config_sources": [
          {
            "path": "hydra.conf",
            "provider": "hydra",
            "schema": "pkg"
          },
          {
            "path": "hydra_zen.wrapper",
            "provider": "main",
            "schema": "pkg"
          },
          {
            "path": "hydra_plugins.hydra_colorlog.conf",
            "provider": "hydra-colorlog",
            "schema": "pkg"
          },
          {
            "path": "/scratch/a.palmas/code-interp-benchmark/pyine/configs",
            "provider": "pyine_cwd",
            "schema": "file"
          },
          {
            "path": "/scratch/a.palmas/code-interp-benchmark/pyine/configs",
            "provider": "pyine_repo",
            "schema": "file"
          },
          {
            "path": "",
            "provider": "schema",
            "schema": "structured"
          }
        ],
        "cwd": "/scratch/a.palmas/code-interp-benchmark",
        "output_dir": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49",
        "version": "1.3.2",
        "version_base": "1.3"
      },
      "searchpath": [],
      "sweep": {
        "dir": "/scratch/a.palmas/code-interp-benchmark/logs/sweeps/rl_trainer/original/RL_HT_49/original/RL_HT_49",
        "subdir": "default"
      },
      "sweeper": {
        "_target_": "hydra._internal.core_plugins.basic_sweeper.BasicSweeper",
        "max_batch_size": null,
        "params": null
      },
      "verbose": false
    },
    "metadata": {
      "created_by": "a.palmas",
      "data_root": "/scratch/a.palmas/code-interp-benchmark/data",
      "dotenv_path": "/scratch/a.palmas/code-interp-benchmark/.env",
      "framework_version": "0.1.4",
      "git_repo_clean": "True",
      "git_revision_hash": "f757ce0a0138eb0839c6aee27a1828f6aa4cb294",
      "local_timestamp": "20260225-105907",
      "logs_root": "/scratch/a.palmas/code-interp-benchmark/logs",
      "platform": "uname_result(system='Linux', node='gpu04', release='6.8.0-100-generic', version='#100-Ubuntu SMP PREEMPT_DYNAMIC Tue Jan 13 16:40:06 UTC 2026', machine='x86_64')",
      "project_root": "/scratch/a.palmas/code-interp-benchmark",
      "python_version": "3.12.3",
      "runtime_hash": "199a5a0a564dd9b6b8e116e1335ece9762a156ac",
      "sys_argv": "['pyine/apps/trainers/hf_trainer.py', '+experiment=original/v0_rl.yaml']",
      "sys_executable": "/scratch/a.palmas/code-interp-benchmark/.venv/bin/python3",
      "time_since_epoch": "1772035147.9544945",
      "tmp_dir": "/tmp/pyine/pyine-a.palmas",
      "work_dir": "/scratch/a.palmas/code-interp-benchmark"
    },
    "notes": null,
    "output_dir": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49",
    "run_group": "original/RL_HT_49",
    "run_name": "original/RL_HT_49",
    "seed": 0,
    "seed_workers": false,
    "tags": [
      "model:Qwen/Qwen3-4B-Instruct-2507",
      "rl-training"
    ],
    "wandb_run_dir": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49/wandb/run-20260225_105919-4567hspa/files",
    "wandb_run_entity": "lawzero-default",
    "wandb_run_id": "4567hspa",
    "wandb_run_project": "pyine",
    "wandb_run_url": "https://wandb.ai/lawzero-default/pyine/runs/4567hspa"
  },
  "shutdown_requested": false,
  "training_loss": null
}