{ "best_metric": null, "config": { "auto_model_config": { "attn_implementation": "flash_attention_2", "use_cache": false }, "auto_resume_if_possible": true, "auto_tokenizer_config": { "use_fast": true }, "base_model": "Qwen/Qwen3-4B-Instruct-2507", "cache_config": { "cache_dir": null, "force_regenerate": false, "use_cache": true }, "datamodule_config": { "add_block_markers": false, "add_line_numbers": false, "base_filter_rule": "", "cache_lock_timeout_seconds": 1800.0, "dataloader_config_overrides": { "train": { "shuffle": true } }, "datamodule_class_path": "pyine.organisms.datamodules.shortcuts.ShortcutBiasDataModule", "datamodule_name": null, "dataparser_config_overrides": { "test": {}, "train": { "filtering_config": { "max_args_length": 500, "max_code_length": 2500, "max_code_line_count": 250, "max_code_line_length": 250, "max_traces_per_solution": 1 }, "selection_config": { "code_type_prob_map": { "hinted": 0.05, "obfuscated": 0.05, "obfuscated_hinted": 0.05, "original": 0.75, "stubbed": 0.1 }, "draw_attempts": 5, "fallback_to_orig": true, "samples_per_family": 1 }, "transform_config": { "transform_strategy": "never" } }, "valid": { "filtering_config": { "max_args_length": 500, "max_code_length": 2500, "max_code_line_count": 250, "max_code_line_length": 250, "max_traces_per_solution": 1 }, "selection_config": { "code_type_prob_map": { "hinted": 0.0, "obfuscated": 0.0, "obfuscated_hinted": 0.0, "original": 1.0, "stubbed": 0.0 }, "draw_attempts": 5, "fallback_to_orig": true, "samples_per_family": 1 }, "transform_config": { "transform_strategy": "never" } }, "valid_hinted": { "filtering_config": { "max_args_length": null, "max_code_length": null, "max_code_line_count": null, "max_code_line_length": null, "max_trace_families": null, "max_trace_steps": null, "max_traces_per_family": null, "max_traces_per_problem": null, "max_traces_per_solution": null, "seed": null, "tokenizer_model_id": null, "tokenizer_path": null, "use_token_lengths": false }, "selection_config": { "allow_db_lookups": true, "fallback_to_orig": false, "require_hint_type": "helpful" } }, "valid_hintless": { "filtering_config": { "max_args_length": null, "max_code_length": null, "max_code_line_count": null, "max_code_line_length": null, "max_trace_families": null, "max_trace_steps": null, "max_traces_per_family": null, "max_traces_per_problem": null, "max_traces_per_solution": null, "seed": null, "tokenizer_model_id": null, "tokenizer_path": null, "use_token_lengths": false }, "selection_config": { "fallback_to_orig": false, "skip_code_type_selection": true } }, "valid_misleading": { "filtering_config": { "max_args_length": null, "max_code_length": null, "max_code_line_count": null, "max_code_line_length": null, "max_trace_families": null, "max_trace_steps": null, "max_traces_per_family": null, "max_traces_per_problem": null, "max_traces_per_solution": null, "seed": null, "tokenizer_model_id": null, "tokenizer_path": null, "use_token_lengths": false }, "selection_config": { "allow_db_lookups": true, "fallback_to_orig": false, "require_hint_type": "misleading", "require_validated_misleading": true } } }, "default_dataloader_config": { "base_class_path": "torch.utils.data.dataloader.DataLoader", "class_path": "torch.utils.data.dataloader.DataLoader", "params": { "batch_sampler": null, "batch_size": 1, "collate_fn": null, "drop_last": false, "generator": null, "in_order": true, "multiprocessing_context": null, "num_workers": 0, "persistent_workers": false, "pin_memory": false, "pin_memory_device": "", "prefetch_factor": null, "sampler": null, "shuffle": null, "timeout": 0, "worker_init_fn": null }, "params_key": null }, "default_dataparser_config": { "base_class_path": "torch.utils.data.dataset.Dataset", "class_path": "pyine.organisms.datamodules.samples.builder.SampleBuilder", "params": { "filtering_config": { "seed": 0 }, "selection_config": { "allow_db_lookups": true, "code_type_prob_map": { "hinted": 0.0, "obfuscated": 0.0, "obfuscated_hinted": 0.0, "original": 1.0, "stubbed": 0.0 }, "draw_attempts": 5, "fallback_to_orig": false, "samples_per_family": 1, "seed": 0 }, "transform_config": { "seed": 0, "transform_strategy": "never" } }, "params_key": null }, "eval_hint_types": [ "helpful", "misleading" ], "eval_subset_names": [ "valid" ], "evaluation_strategy": "counterfactual", "hf_messages_key": "prompt", "instantiate_parsers_at_setup": false, "keep_generated_datasets_in_memory": false, "lmdb_paths": [ "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000001of000026.2025-12-10.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000002of000026.2025-12-10.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000003of000026.2025-12-10.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000004of000026.2025-12-11.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000005of000026.2025-12-11.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000006of000026.2025-12-12.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000007of000026.2025-12-12.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000008of000026.2025-12-10.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000009of000026.2025-12-12.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000010of000026.2025-12-11.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000011of000026.2025-12-12.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000012of000026.2025-12-12.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000013of000026.2025-12-12.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000014of000026.2025-12-13.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000015of000026.2025-12-10.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000016of000026.2025-12-10.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000017of000026.2025-12-10.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000018of000026.2025-12-11.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000019of000026.2025-12-11.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000020of000026.2025-12-12.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000021of000026.2025-12-10.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000022of000026.2025-12-10.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000023of000026.2025-12-11.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000024of000026.2025-12-11.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000025of000026.2025-12-11.lmdb", "/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000026of000026.2025-12-12.lmdb" ], "max_solution_count": null, "message_generator_num_workers": 8, "min_samples_hinted": 0, "min_samples_hintless": 0, "min_samples_misleading": 0, "pregenerated_outputs_lmdb_paths": null, "pregenerated_outputs_only_matched": false, "pregenerated_outputs_phase_prefix": "", "pregenerated_outputs_selection": "latest", "prompt_config": { "context_variables": null, "examples_block_variables": null, "include_examples": false, "partial_vars": {}, "prompt_name": "code_execution", "role_variables": null, "target_examples": null, "use_chat_template": true, "version": "rl_tagged_answer" }, "require_validated_misleading": true, "split_file_path": "/scratch/a.palmas/code-interp-benchmark/data/splits/TACO-split.bin", "split_seed": 0, "subset_names": [ "train", "valid", "test", "valid_hinted", "valid_misleading", "valid_hintless" ], "train_subset_names": [ "train" ], "use_local_dataset_cache": true, "use_tokenized_dataset_cache": true, "valid_subset_names": [ "valid_hinted", "valid_hintless", "valid_misleading" ] }, "evals_config": { "category_extraction_config": { "enabled_fields": [ "has_keyword", "code_type", "predict_type" ], "tag_prefixes": null }, "eval_batch_size": 24, "eval_generation_config": null, "eval_generation_max_new_tokens_override": 1024, "eval_padding_side": "left", "eval_runnable_config": { "async_metrics_compute_rate": 100, "max_in_flight_jobs": 32, "max_workers": null, "parallel": true }, "eval_type": "code_exec", "evaluator_kwargs": { "add_idempotency_header": true, "llm_provider_config": { "model_kwargs": { "max_retries": 0, "max_tokens": 1024, "model": "gpt-5-nano", "reasoning": { "effort": "minimal" }, "timeout": 15 }, "provider": "openai", "rate_limiter_config": { "max_bucket_size": 50, "requests_per_second": 50 }, "with_retry_config": { "retry_if_exception_type": [ "openai.APITimeoutError", "openai.APIConnectionError", "openai.RateLimitError", "openai.InternalServerError" ], "stop_after_attempt": 10, "wait_exponential_jitter": true } } }, "vllm_provider_config": null }, "generation_export_config": null, "gpu_stats_logging": { "collect_all_visible_devices": false, "eval_prefix": "eval/gpu/", "gather_eval_metrics": true, "gather_train_metrics": "at_phase_end", "only_main_process": true, "require_nvml": false, "sample_every_n_steps": 1, "train_prefix": "train/gpu/" }, "grpo_config": { "_n_gpu": 1, "accelerator_config": { "dispatch_batches": null, "even_batches": true, "gradient_accumulation_kwargs": null, "non_blocking": false, "split_batches": false, "use_configured_state": false, "use_seedable_sampler": true }, "adafactor": false, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_epsilon": 1e-08, "auto_find_batch_size": false, "average_tokens_across_devices": true, "batch_eval_metrics": false, "beta": 0.0, "bf16": true, "bf16_full_eval": false, "cache_implementation": null, "cast_lm_head_to_fp32": false, "chat_template_kwargs": null, "data_seed": null, "dataloader_drop_last": false, "dataloader_num_workers": 4, "dataloader_persistent_workers": false, "dataloader_pin_memory": false, "dataloader_prefetch_factor": null, "ddp_backend": null, "ddp_broadcast_buffers": null, "ddp_bucket_cap_mb": null, "ddp_find_unused_parameters": null, "ddp_timeout": 1800, "debug": [], "deepspeed": null, "delta": null, "disable_dropout": false, "disable_tqdm": false, "do_eval": true, "do_predict": false, "do_train": true, "ds3_gather_for_generation": true, "epsilon": 0.2, "epsilon_high": null, "eval_accumulation_steps": null, "eval_delay": 0.0, "eval_do_concat_batches": true, "eval_on_start": true, "eval_steps": 10, "eval_strategy": "steps", "eval_use_gather_object": false, "fp16": false, "fp16_backend": "auto", "fp16_full_eval": false, "fp16_opt_level": "O1", "fsdp": [], "fsdp_config": { "min_num_params": 0, "xla": false, "xla_fsdp_grad_ckpt": false, "xla_fsdp_v2": false }, "fsdp_min_num_params": 0, "fsdp_transformer_layer_cls_to_wrap": null, "full_determinism": false, "generation_batch_size": 960, "generation_kwargs": null, "gradient_accumulation_steps": 6, "gradient_checkpointing": true, "gradient_checkpointing_kwargs": { "use_reentrant": false }, "greater_is_better": null, "group_by_length": false, "half_precision_backend": "auto", "hub_always_push": false, "hub_model_id": null, "hub_private_repo": null, "hub_revision": null, "hub_strategy": "every_save", "hub_token": null, "ignore_data_skip": false, "importance_sampling_level": "token", "include_for_metrics": [], "include_inputs_for_metrics": false, "include_num_input_tokens_seen": "no", "include_tokens_per_second": false, "jit_mode_eval": false, "label_names": null, "label_smoothing_factor": 0.0, "learning_rate": 5e-06, "length_column_name": "length", "liger_kernel_config": null, "load_best_model_at_end": false, "local_rank": 0, "log_completions": false, "log_level": "passive", "log_level_replica": "warning", "log_on_each_node": true, "log_unique_prompts": false, "logging_dir": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49/logs", "logging_first_step": false, "logging_nan_inf_filter": true, "logging_steps": 1.0, "logging_strategy": "steps", "loss_type": "dapo", "lr_scheduler_kwargs": null, "lr_scheduler_type": "constant_with_warmup", "mask_truncated_completions": false, "max_completion_length": 10000, "max_grad_norm": 1.0, "max_prompt_length": 3000, "max_steps": -1, "max_tool_calling_iterations": null, "metric_for_best_model": null, "min_p": null, "model_init_kwargs": null, "mp_parameters": "", "multi_objective_aggregation": "sum_then_normalize", "neftune_noise_alpha": null, "no_cuda": false, "num_completions_to_print": null, "num_generations": 32, "num_generations_eval": 1, "num_iterations": 1, "num_train_epochs": 1.0, "off_policy_mask_threshold": null, "optim": "adamw_torch_fused", "optim_args": null, "optim_target_modules": null, "output_dir": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49", "overwrite_output_dir": false, "parallelism_config": null, "past_index": -1, "per_device_eval_batch_size": 5, "per_device_train_batch_size": 4, "per_gpu_eval_batch_size": null, "per_gpu_train_batch_size": null, "prediction_loss_only": false, "project": "huggingface", "push_to_hub": false, "push_to_hub_model_id": null, "push_to_hub_organization": null, "push_to_hub_token": null, "ray_scope": "last", "ref_model_mixup_alpha": 0.6, "ref_model_sync_steps": 512, "remove_unused_columns": false, "repetition_penalty": 1.0, "report_to": [ "tensorboard", "wandb" ], "restore_callback_states_from_checkpoint": false, "resume_from_checkpoint": null, "reward_weights": null, "run_name": "original/RL_HT_49-original/RL_HT_49", "sapo_temperature_neg": 1.05, "sapo_temperature_pos": 1.0, "save_on_each_node": false, "save_only_model": false, "save_safetensors": true, "save_steps": 50, "save_strategy": "steps", "save_total_limit": 500, "scale_rewards": "none", "seed": 0, "shuffle_dataset": true, "skip_memory_metrics": true, "steps_per_generation": 6, "sync_ref_model": false, "temperature": 0.7, "tf32": true, "top_entropy_quantile": 1.0, "top_k": 0, "top_p": 0.9, "torch_compile": false, "torch_compile_backend": null, "torch_compile_mode": null, "torch_empty_cache_steps": null, "torchdynamo": null, "tpu_metrics_debug": false, "tpu_num_cores": null, "trackio_space_id": "trackio", "use_bias_correction_kl": false, "use_cpu": false, "use_legacy_prediction_loop": false, "use_liger_kernel": false, "use_liger_loss": null, "use_mps_device": false, "use_transformers_paged": false, "use_vllm": true, "vllm_enable_sleep_mode": false, "vllm_gpu_memory_utilization": 0.15, "vllm_group_port": 51216, "vllm_guided_decoding_regex": null, "vllm_importance_sampling_cap": 3.0, "vllm_importance_sampling_correction": true, "vllm_importance_sampling_mode": "sequence_mask", "vllm_max_model_length": 13000, "vllm_mode": "colocate", "vllm_model_impl": "vllm", "vllm_server_base_url": null, "vllm_server_host": "0.0.0.0", "vllm_server_port": 8050, "vllm_server_timeout": 240.0, "vllm_structured_outputs_regex": null, "vllm_tensor_parallel_size": 1, "warmup_ratio": 0.0, "warmup_steps": 0, "weight_decay": 0.0 }, "lora_config": null, "quantization_mode": "none", "resume_checkpoint_name": null, "resume_from_run_dir": null, "resume_incompatibility_policy": "warn", "resume_wandb_behavior": "allow", "reward_manager_config": { "aggregation": { "clip_term_max": null, "clip_term_min": null, "clip_total_max": null, "clip_total_min": 0.0, "return_raw_breakdown": false, "strategy": "weighted_sum" }, "difficulty": { "bin_edges": null, "bin_max_score": null, "code_override_mode": "skip", "enabled": true, "normalization_mode": "log", "num_difficulty_bins": 10, "primary_source": "trace_step_count", "score_clip_max": null, "secondary_sources": [ "halstead_effort" ], "source_ranges": null, "track_bin_quantiles": "disabled", "track_per_term_rewards": "disabled", "track_percentiles": "disabled" }, "logging": { "barrier_before_finalize": true, "category_extraction_config": { "enabled_fields": [ "tags", "code_type" ], "tag_prefixes": [ "difficulty", "parser", "total_steps" ] }, "enabled": true, "expect_all_rank_logging": false, "gather_distributed_summaries": true, "histogram_max_samples": 100000, "histogram_num_bins": 50, "log_batch_stats": true, "log_every_n_generations": 16, "log_metrics": true, "log_tables": true, "log_terms": true, "log_total": true, "main_process_only": true, "step_metric_key": "train/global_step", "table_max_rows": 100 }, "parsing": { "capture_diagnostics": true, "enabled_fields": "both", "fallback_policy": "none", "final_tag": "final", "mode": "tags", "multi_tag_policy": "last", "openai_tokenizer_model": null, "reasoning_from_entire_output_when_no_final_answer": true, "reasoning_from_outside_final": true, "reasoning_tag": "reasoning", "strict": false, "track_token_lengths": true }, "terms": [ { "enabled": true, "name": "soft_match", "params": { "reward_if_match": 1.0, "reward_if_no_match": 0.0 }, "require_parsed": true, "type": "soft_match", "weight": 1.0 } ], "verbosity_scaling": { "decay_rate": 0.001, "decay_type": "linear", "emit_metrics": true, "enabled": true, "end_tokens": 10000, "length_source": "parsed_reasoning", "max_factor": 1.0, "min_factor": 0.1, "mode": "absolute", "skip_negative_rewards": true, "temperature": 1.0, "threshold_tokens": 0 } }, "throughput_logging": { "eval_prefix": "eval/throughput/", "log_eval_throughput": true, "log_train_throughput": true, "only_main_process": true, "train_prefix": "train/throughput/" }, "tokenizer_override_padding_to_right_side": true, "tokenizer_override_truncation_to_left_side": true, "tokenizer_set_padding_to_eos_if_needed": true, "use_wandb_logging": true, "wandb_init_on_all_ranks": false }, "config_hash": "f43e390d722a6d1f5df4061495bce911e1ee04d9", "epoch": 0.40053404539385845, "generated_at": 1772305776.9167938, "git_revision": "git-revision-unknown", "global_step": 600, "runtime": { "app_name": "rl_trainer", "dry_run": false, "exp_name": "original/RL_HT_49", "hydra_runtime_config": { "callbacks": { "non_primary_rank_cleanup": { "_target_": "pyine.configs.callbacks.NonPrimaryRankCleanupCallback" } }, "env": {}, "help": { "app_name": "rl_trainer", "footer": "Powered by Hydra (https://hydra.cc)\nUse --hydra-help to view Hydra specific help\n", "header": "rl_trainer is powered by Hydra.\n", "template": "rl_trainer is powered by Hydra.\n\n== Configuration groups ==\nCompose your configuration from those groups (group=option)\n\n$APP_CONFIG_GROUPS\n\n== Config ==\nOverride anything in the config (foo.bar=value)\n\n$CONFIG\n\nPowered by Hydra (https://hydra.cc)\nUse --hydra-help to view Hydra specific help\n\n" }, "hydra_help": { "hydra_help": "???", "template": "Hydra (1.3.2)\nSee https://hydra.cc for more info.\n\n== Flags ==\n$FLAGS_HELP\n\n== Configuration groups ==\nCompose your configuration from those groups (For example, append hydra/job_logging=disabled to command line)\n\n$HYDRA_CONFIG_GROUPS\n\nUse '--cfg hydra' to Show the Hydra config.\n" }, "hydra_logging": { "disable_existing_loggers": false, "formatters": { "colorlog": { "()": "colorlog.ColoredFormatter", "format": "[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s" } }, "handlers": { "console": { "class": "logging.StreamHandler", "formatter": "colorlog", "stream": "ext://sys.stdout" } }, "root": { "handlers": [ "console" ], "level": "INFO" }, "version": 1 }, "job": { "chdir": null, "config": { "override_dirname": { "exclude_keys": [], "item_sep": ",", "kv_sep": "=" } }, "config_name": "entrypoint", "env_copy": [], "env_set": {}, "id": "???", "name": "rl_trainer", "num": "???", "override_dirname": "+experiment=original/v0_rl.yaml" }, "job_logging": { "disable_existing_loggers": false, "formatters": { "colorlog": { "()": "colorlog.ColoredFormatter", "format": "[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s", "log_colors": { "CRITICAL": "red", "DEBUG": "purple", "ERROR": "red", "INFO": "green", "WARNING": "yellow" } }, "simple": { "format": "[%(asctime)s][%(name)s][%(levelname)s] - %(message)s" } }, "handlers": { "console": { "class": "logging.StreamHandler", "formatter": "colorlog", "stream": "ext://sys.stdout" }, "file": { "class": "logging.FileHandler", "filename": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49/output.log", "formatter": "simple" } }, "loggers": { "pyine": { "level": "INFO" } }, "root": { "handlers": [ "console", "file" ], "level": "INFO" }, "version": 1 }, "launcher": { "_target_": "hydra._internal.core_plugins.basic_launcher.BasicLauncher" }, "mode": 1, "output_subdir": ".hydra", "overrides": { "hydra": [ "hydra.mode=RUN" ], "task": [ "+experiment=original/v0_rl.yaml" ] }, "run": { "dir": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49" }, "runtime": { "choices": { "config": "base", "config/datamodule_config": "shortcuts_TACO_10s10t_v1_full", "config/evals_config": "base", "config/evals_config/evaluator_kwargs/llm_provider_config": "openai_gpt5nano_scoring", "config/grpo_config": "train_default", "experiment": "original/v0_rl.yaml", "hydra/callbacks": null, "hydra/env": "default", "hydra/help": "default", "hydra/hydra_help": "default", "hydra/hydra_logging": "colorlog", "hydra/job_logging": "colorlog", "hydra/launcher": "basic", "hydra/output": "default", "hydra/sweeper": "basic", "runtime": "default" }, "config_sources": [ { "path": "hydra.conf", "provider": "hydra", "schema": "pkg" }, { "path": "hydra_zen.wrapper", "provider": "main", "schema": "pkg" }, { "path": "hydra_plugins.hydra_colorlog.conf", "provider": "hydra-colorlog", "schema": "pkg" }, { "path": "/scratch/a.palmas/code-interp-benchmark/pyine/configs", "provider": "pyine_cwd", "schema": "file" }, { "path": "/scratch/a.palmas/code-interp-benchmark/pyine/configs", "provider": "pyine_repo", "schema": "file" }, { "path": "", "provider": "schema", "schema": "structured" } ], "cwd": "/scratch/a.palmas/code-interp-benchmark", "output_dir": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49", "version": "1.3.2", "version_base": "1.3" }, "searchpath": [], "sweep": { "dir": "/scratch/a.palmas/code-interp-benchmark/logs/sweeps/rl_trainer/original/RL_HT_49/original/RL_HT_49", "subdir": "default" }, "sweeper": { "_target_": "hydra._internal.core_plugins.basic_sweeper.BasicSweeper", "max_batch_size": null, "params": null }, "verbose": false }, "metadata": { "created_by": "a.palmas", "data_root": "/scratch/a.palmas/code-interp-benchmark/data", "dotenv_path": "/scratch/a.palmas/code-interp-benchmark/.env", "framework_version": "0.1.4", "git_repo_clean": "True", "git_revision_hash": "f757ce0a0138eb0839c6aee27a1828f6aa4cb294", "local_timestamp": "20260225-105907", "logs_root": "/scratch/a.palmas/code-interp-benchmark/logs", "platform": "uname_result(system='Linux', node='gpu04', release='6.8.0-100-generic', version='#100-Ubuntu SMP PREEMPT_DYNAMIC Tue Jan 13 16:40:06 UTC 2026', machine='x86_64')", "project_root": "/scratch/a.palmas/code-interp-benchmark", "python_version": "3.12.3", "runtime_hash": "199a5a0a564dd9b6b8e116e1335ece9762a156ac", "sys_argv": "['pyine/apps/trainers/hf_trainer.py', '+experiment=original/v0_rl.yaml']", "sys_executable": "/scratch/a.palmas/code-interp-benchmark/.venv/bin/python3", "time_since_epoch": "1772035147.9544945", "tmp_dir": "/tmp/pyine/pyine-a.palmas", "work_dir": "/scratch/a.palmas/code-interp-benchmark" }, "notes": null, "output_dir": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49", "run_group": "original/RL_HT_49", "run_name": "original/RL_HT_49", "seed": 0, "seed_workers": false, "tags": [ "model:Qwen/Qwen3-4B-Instruct-2507", "rl-training" ], "wandb_run_dir": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49/wandb/run-20260225_105919-4567hspa/files", "wandb_run_entity": "lawzero-default", "wandb_run_id": "4567hspa", "wandb_run_project": "pyine", "wandb_run_url": "https://wandb.ai/lawzero-default/pyine/runs/4567hspa" }, "shutdown_requested": false, "training_loss": null }