Files
pyine-v1-qwen3-4b-shortcut/run_meta.json
ModelHub XC a74e97d95d 初始化项目,由ModelHub XC社区提供模型
Model: plstcharles-saifh/pyine-v1-qwen3-4b-shortcut
Source: Original Platform
2026-05-23 08:37:17 +08:00

901 lines
31 KiB
JSON

{
"best_metric": null,
"config": {
"auto_model_config": {
"attn_implementation": "flash_attention_2",
"use_cache": false
},
"auto_resume_if_possible": true,
"auto_tokenizer_config": {
"use_fast": true
},
"base_model": "Qwen/Qwen3-4B-Instruct-2507",
"cache_config": {
"cache_dir": null,
"force_regenerate": false,
"use_cache": true
},
"datamodule_config": {
"add_block_markers": false,
"add_line_numbers": false,
"base_filter_rule": "",
"cache_lock_timeout_seconds": 1800.0,
"dataloader_config_overrides": {
"train": {
"shuffle": true
}
},
"datamodule_class_path": "pyine.organisms.datamodules.shortcuts.ShortcutBiasDataModule",
"datamodule_name": null,
"dataparser_config_overrides": {
"test": {},
"train": {
"filtering_config": {
"max_args_length": 500,
"max_code_length": 2500,
"max_code_line_count": 250,
"max_code_line_length": 250,
"max_traces_per_solution": 1
},
"selection_config": {
"code_type_prob_map": {
"hinted": 0.05,
"obfuscated": 0.05,
"obfuscated_hinted": 0.05,
"original": 0.75,
"stubbed": 0.1
},
"draw_attempts": 5,
"fallback_to_orig": true,
"samples_per_family": 1
},
"transform_config": {
"transform_strategy": "never"
}
},
"valid": {
"filtering_config": {
"max_args_length": 500,
"max_code_length": 2500,
"max_code_line_count": 250,
"max_code_line_length": 250,
"max_traces_per_solution": 1
},
"selection_config": {
"code_type_prob_map": {
"hinted": 0.0,
"obfuscated": 0.0,
"obfuscated_hinted": 0.0,
"original": 1.0,
"stubbed": 0.0
},
"draw_attempts": 5,
"fallback_to_orig": true,
"samples_per_family": 1
},
"transform_config": {
"transform_strategy": "never"
}
},
"valid_hinted": {
"filtering_config": {
"max_args_length": null,
"max_code_length": null,
"max_code_line_count": null,
"max_code_line_length": null,
"max_trace_families": null,
"max_trace_steps": null,
"max_traces_per_family": null,
"max_traces_per_problem": null,
"max_traces_per_solution": null,
"seed": null,
"tokenizer_model_id": null,
"tokenizer_path": null,
"use_token_lengths": false
},
"selection_config": {
"allow_db_lookups": true,
"fallback_to_orig": false,
"require_hint_type": "helpful"
}
},
"valid_hintless": {
"filtering_config": {
"max_args_length": null,
"max_code_length": null,
"max_code_line_count": null,
"max_code_line_length": null,
"max_trace_families": null,
"max_trace_steps": null,
"max_traces_per_family": null,
"max_traces_per_problem": null,
"max_traces_per_solution": null,
"seed": null,
"tokenizer_model_id": null,
"tokenizer_path": null,
"use_token_lengths": false
},
"selection_config": {
"fallback_to_orig": false,
"skip_code_type_selection": true
}
},
"valid_misleading": {
"filtering_config": {
"max_args_length": null,
"max_code_length": null,
"max_code_line_count": null,
"max_code_line_length": null,
"max_trace_families": null,
"max_trace_steps": null,
"max_traces_per_family": null,
"max_traces_per_problem": null,
"max_traces_per_solution": null,
"seed": null,
"tokenizer_model_id": null,
"tokenizer_path": null,
"use_token_lengths": false
},
"selection_config": {
"allow_db_lookups": true,
"fallback_to_orig": false,
"require_hint_type": "misleading",
"require_validated_misleading": true
}
}
},
"default_dataloader_config": {
"base_class_path": "torch.utils.data.dataloader.DataLoader",
"class_path": "torch.utils.data.dataloader.DataLoader",
"params": {
"batch_sampler": null,
"batch_size": 1,
"collate_fn": null,
"drop_last": false,
"generator": null,
"in_order": true,
"multiprocessing_context": null,
"num_workers": 0,
"persistent_workers": false,
"pin_memory": false,
"pin_memory_device": "",
"prefetch_factor": null,
"sampler": null,
"shuffle": null,
"timeout": 0,
"worker_init_fn": null
},
"params_key": null
},
"default_dataparser_config": {
"base_class_path": "torch.utils.data.dataset.Dataset",
"class_path": "pyine.organisms.datamodules.samples.builder.SampleBuilder",
"params": {
"filtering_config": {
"seed": 0
},
"selection_config": {
"allow_db_lookups": true,
"code_type_prob_map": {
"hinted": 0.0,
"obfuscated": 0.0,
"obfuscated_hinted": 0.0,
"original": 1.0,
"stubbed": 0.0
},
"draw_attempts": 5,
"fallback_to_orig": false,
"samples_per_family": 1,
"seed": 0
},
"transform_config": {
"seed": 0,
"transform_strategy": "never"
}
},
"params_key": null
},
"eval_hint_types": [
"helpful",
"misleading"
],
"eval_subset_names": [
"valid"
],
"evaluation_strategy": "counterfactual",
"hf_messages_key": "prompt",
"instantiate_parsers_at_setup": false,
"keep_generated_datasets_in_memory": false,
"lmdb_paths": [
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000001of000026.2025-12-10.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000002of000026.2025-12-10.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000003of000026.2025-12-10.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000004of000026.2025-12-11.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000005of000026.2025-12-11.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000006of000026.2025-12-12.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000007of000026.2025-12-12.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000008of000026.2025-12-10.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000009of000026.2025-12-12.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000010of000026.2025-12-11.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000011of000026.2025-12-12.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000012of000026.2025-12-12.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000013of000026.2025-12-12.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000014of000026.2025-12-13.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000015of000026.2025-12-10.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000016of000026.2025-12-10.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000017of000026.2025-12-10.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000018of000026.2025-12-11.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000019of000026.2025-12-11.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000020of000026.2025-12-12.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000021of000026.2025-12-10.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000022of000026.2025-12-10.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000023of000026.2025-12-11.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000024of000026.2025-12-11.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000025of000026.2025-12-11.lmdb",
"/scratch/a.palmas/code-interp-benchmark/data/traces/TACO/v1.5/10s10t.000026of000026.2025-12-12.lmdb"
],
"max_solution_count": null,
"message_generator_num_workers": 8,
"min_samples_hinted": 0,
"min_samples_hintless": 0,
"min_samples_misleading": 0,
"pregenerated_outputs_lmdb_paths": null,
"pregenerated_outputs_only_matched": false,
"pregenerated_outputs_phase_prefix": "",
"pregenerated_outputs_selection": "latest",
"prompt_config": {
"context_variables": null,
"examples_block_variables": null,
"include_examples": false,
"partial_vars": {},
"prompt_name": "code_execution",
"role_variables": null,
"target_examples": null,
"use_chat_template": true,
"version": "rl_tagged_answer"
},
"require_validated_misleading": true,
"split_file_path": "/scratch/a.palmas/code-interp-benchmark/data/splits/TACO-split.bin",
"split_seed": 0,
"subset_names": [
"train",
"valid",
"test",
"valid_hinted",
"valid_misleading",
"valid_hintless"
],
"train_subset_names": [
"train"
],
"use_local_dataset_cache": true,
"use_tokenized_dataset_cache": true,
"valid_subset_names": [
"valid_hinted",
"valid_hintless",
"valid_misleading"
]
},
"evals_config": {
"category_extraction_config": {
"enabled_fields": [
"has_keyword",
"code_type",
"predict_type"
],
"tag_prefixes": null
},
"eval_batch_size": 24,
"eval_generation_config": null,
"eval_generation_max_new_tokens_override": 1024,
"eval_padding_side": "left",
"eval_runnable_config": {
"async_metrics_compute_rate": 100,
"max_in_flight_jobs": 32,
"max_workers": null,
"parallel": true
},
"eval_type": "code_exec",
"evaluator_kwargs": {
"add_idempotency_header": true,
"llm_provider_config": {
"model_kwargs": {
"max_retries": 0,
"max_tokens": 1024,
"model": "gpt-5-nano",
"reasoning": {
"effort": "minimal"
},
"timeout": 15
},
"provider": "openai",
"rate_limiter_config": {
"max_bucket_size": 50,
"requests_per_second": 50
},
"with_retry_config": {
"retry_if_exception_type": [
"openai.APITimeoutError",
"openai.APIConnectionError",
"openai.RateLimitError",
"openai.InternalServerError"
],
"stop_after_attempt": 10,
"wait_exponential_jitter": true
}
}
},
"vllm_provider_config": null
},
"generation_export_config": null,
"gpu_stats_logging": {
"collect_all_visible_devices": false,
"eval_prefix": "eval/gpu/",
"gather_eval_metrics": true,
"gather_train_metrics": "at_phase_end",
"only_main_process": true,
"require_nvml": false,
"sample_every_n_steps": 1,
"train_prefix": "train/gpu/"
},
"grpo_config": {
"_n_gpu": 1,
"accelerator_config": {
"dispatch_batches": null,
"even_batches": true,
"gradient_accumulation_kwargs": null,
"non_blocking": false,
"split_batches": false,
"use_configured_state": false,
"use_seedable_sampler": true
},
"adafactor": false,
"adam_beta1": 0.9,
"adam_beta2": 0.999,
"adam_epsilon": 1e-08,
"auto_find_batch_size": false,
"average_tokens_across_devices": true,
"batch_eval_metrics": false,
"beta": 0.0,
"bf16": true,
"bf16_full_eval": false,
"cache_implementation": null,
"cast_lm_head_to_fp32": false,
"chat_template_kwargs": null,
"data_seed": null,
"dataloader_drop_last": false,
"dataloader_num_workers": 4,
"dataloader_persistent_workers": false,
"dataloader_pin_memory": false,
"dataloader_prefetch_factor": null,
"ddp_backend": null,
"ddp_broadcast_buffers": null,
"ddp_bucket_cap_mb": null,
"ddp_find_unused_parameters": null,
"ddp_timeout": 1800,
"debug": [],
"deepspeed": null,
"delta": null,
"disable_dropout": false,
"disable_tqdm": false,
"do_eval": true,
"do_predict": false,
"do_train": true,
"ds3_gather_for_generation": true,
"epsilon": 0.2,
"epsilon_high": null,
"eval_accumulation_steps": null,
"eval_delay": 0.0,
"eval_do_concat_batches": true,
"eval_on_start": true,
"eval_steps": 10,
"eval_strategy": "steps",
"eval_use_gather_object": false,
"fp16": false,
"fp16_backend": "auto",
"fp16_full_eval": false,
"fp16_opt_level": "O1",
"fsdp": [],
"fsdp_config": {
"min_num_params": 0,
"xla": false,
"xla_fsdp_grad_ckpt": false,
"xla_fsdp_v2": false
},
"fsdp_min_num_params": 0,
"fsdp_transformer_layer_cls_to_wrap": null,
"full_determinism": false,
"generation_batch_size": 960,
"generation_kwargs": null,
"gradient_accumulation_steps": 6,
"gradient_checkpointing": true,
"gradient_checkpointing_kwargs": {
"use_reentrant": false
},
"greater_is_better": null,
"group_by_length": false,
"half_precision_backend": "auto",
"hub_always_push": false,
"hub_model_id": null,
"hub_private_repo": null,
"hub_revision": null,
"hub_strategy": "every_save",
"hub_token": null,
"ignore_data_skip": false,
"importance_sampling_level": "token",
"include_for_metrics": [],
"include_inputs_for_metrics": false,
"include_num_input_tokens_seen": "no",
"include_tokens_per_second": false,
"jit_mode_eval": false,
"label_names": null,
"label_smoothing_factor": 0.0,
"learning_rate": 5e-06,
"length_column_name": "length",
"liger_kernel_config": null,
"load_best_model_at_end": false,
"local_rank": 0,
"log_completions": false,
"log_level": "passive",
"log_level_replica": "warning",
"log_on_each_node": true,
"log_unique_prompts": false,
"logging_dir": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49/logs",
"logging_first_step": false,
"logging_nan_inf_filter": true,
"logging_steps": 1.0,
"logging_strategy": "steps",
"loss_type": "dapo",
"lr_scheduler_kwargs": null,
"lr_scheduler_type": "constant_with_warmup",
"mask_truncated_completions": false,
"max_completion_length": 10000,
"max_grad_norm": 1.0,
"max_prompt_length": 3000,
"max_steps": -1,
"max_tool_calling_iterations": null,
"metric_for_best_model": null,
"min_p": null,
"model_init_kwargs": null,
"mp_parameters": "",
"multi_objective_aggregation": "sum_then_normalize",
"neftune_noise_alpha": null,
"no_cuda": false,
"num_completions_to_print": null,
"num_generations": 32,
"num_generations_eval": 1,
"num_iterations": 1,
"num_train_epochs": 1.0,
"off_policy_mask_threshold": null,
"optim": "adamw_torch_fused",
"optim_args": null,
"optim_target_modules": null,
"output_dir": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49",
"overwrite_output_dir": false,
"parallelism_config": null,
"past_index": -1,
"per_device_eval_batch_size": 5,
"per_device_train_batch_size": 4,
"per_gpu_eval_batch_size": null,
"per_gpu_train_batch_size": null,
"prediction_loss_only": false,
"project": "huggingface",
"push_to_hub": false,
"push_to_hub_model_id": null,
"push_to_hub_organization": null,
"push_to_hub_token": null,
"ray_scope": "last",
"ref_model_mixup_alpha": 0.6,
"ref_model_sync_steps": 512,
"remove_unused_columns": false,
"repetition_penalty": 1.0,
"report_to": [
"tensorboard",
"wandb"
],
"restore_callback_states_from_checkpoint": false,
"resume_from_checkpoint": null,
"reward_weights": null,
"run_name": "original/RL_HT_49-original/RL_HT_49",
"sapo_temperature_neg": 1.05,
"sapo_temperature_pos": 1.0,
"save_on_each_node": false,
"save_only_model": false,
"save_safetensors": true,
"save_steps": 50,
"save_strategy": "steps",
"save_total_limit": 500,
"scale_rewards": "none",
"seed": 0,
"shuffle_dataset": true,
"skip_memory_metrics": true,
"steps_per_generation": 6,
"sync_ref_model": false,
"temperature": 0.7,
"tf32": true,
"top_entropy_quantile": 1.0,
"top_k": 0,
"top_p": 0.9,
"torch_compile": false,
"torch_compile_backend": null,
"torch_compile_mode": null,
"torch_empty_cache_steps": null,
"torchdynamo": null,
"tpu_metrics_debug": false,
"tpu_num_cores": null,
"trackio_space_id": "trackio",
"use_bias_correction_kl": false,
"use_cpu": false,
"use_legacy_prediction_loop": false,
"use_liger_kernel": false,
"use_liger_loss": null,
"use_mps_device": false,
"use_transformers_paged": false,
"use_vllm": true,
"vllm_enable_sleep_mode": false,
"vllm_gpu_memory_utilization": 0.15,
"vllm_group_port": 51216,
"vllm_guided_decoding_regex": null,
"vllm_importance_sampling_cap": 3.0,
"vllm_importance_sampling_correction": true,
"vllm_importance_sampling_mode": "sequence_mask",
"vllm_max_model_length": 13000,
"vllm_mode": "colocate",
"vllm_model_impl": "vllm",
"vllm_server_base_url": null,
"vllm_server_host": "0.0.0.0",
"vllm_server_port": 8050,
"vllm_server_timeout": 240.0,
"vllm_structured_outputs_regex": null,
"vllm_tensor_parallel_size": 1,
"warmup_ratio": 0.0,
"warmup_steps": 0,
"weight_decay": 0.0
},
"lora_config": null,
"quantization_mode": "none",
"resume_checkpoint_name": null,
"resume_from_run_dir": null,
"resume_incompatibility_policy": "warn",
"resume_wandb_behavior": "allow",
"reward_manager_config": {
"aggregation": {
"clip_term_max": null,
"clip_term_min": null,
"clip_total_max": null,
"clip_total_min": 0.0,
"return_raw_breakdown": false,
"strategy": "weighted_sum"
},
"difficulty": {
"bin_edges": null,
"bin_max_score": null,
"code_override_mode": "skip",
"enabled": true,
"normalization_mode": "log",
"num_difficulty_bins": 10,
"primary_source": "trace_step_count",
"score_clip_max": null,
"secondary_sources": [
"halstead_effort"
],
"source_ranges": null,
"track_bin_quantiles": "disabled",
"track_per_term_rewards": "disabled",
"track_percentiles": "disabled"
},
"logging": {
"barrier_before_finalize": true,
"category_extraction_config": {
"enabled_fields": [
"tags",
"code_type"
],
"tag_prefixes": [
"difficulty",
"parser",
"total_steps"
]
},
"enabled": true,
"expect_all_rank_logging": false,
"gather_distributed_summaries": true,
"histogram_max_samples": 100000,
"histogram_num_bins": 50,
"log_batch_stats": true,
"log_every_n_generations": 16,
"log_metrics": true,
"log_tables": true,
"log_terms": true,
"log_total": true,
"main_process_only": true,
"step_metric_key": "train/global_step",
"table_max_rows": 100
},
"parsing": {
"capture_diagnostics": true,
"enabled_fields": "both",
"fallback_policy": "none",
"final_tag": "final",
"mode": "tags",
"multi_tag_policy": "last",
"openai_tokenizer_model": null,
"reasoning_from_entire_output_when_no_final_answer": true,
"reasoning_from_outside_final": true,
"reasoning_tag": "reasoning",
"strict": false,
"track_token_lengths": true
},
"terms": [
{
"enabled": true,
"name": "soft_match",
"params": {
"reward_if_match": 1.0,
"reward_if_no_match": 0.0
},
"require_parsed": true,
"type": "soft_match",
"weight": 1.0
}
],
"verbosity_scaling": {
"decay_rate": 0.001,
"decay_type": "linear",
"emit_metrics": true,
"enabled": true,
"end_tokens": 10000,
"length_source": "parsed_reasoning",
"max_factor": 1.0,
"min_factor": 0.1,
"mode": "absolute",
"skip_negative_rewards": true,
"temperature": 1.0,
"threshold_tokens": 0
}
},
"throughput_logging": {
"eval_prefix": "eval/throughput/",
"log_eval_throughput": true,
"log_train_throughput": true,
"only_main_process": true,
"train_prefix": "train/throughput/"
},
"tokenizer_override_padding_to_right_side": true,
"tokenizer_override_truncation_to_left_side": true,
"tokenizer_set_padding_to_eos_if_needed": true,
"use_wandb_logging": true,
"wandb_init_on_all_ranks": false
},
"config_hash": "f43e390d722a6d1f5df4061495bce911e1ee04d9",
"epoch": 0.40053404539385845,
"generated_at": 1772305776.9167938,
"git_revision": "git-revision-unknown",
"global_step": 600,
"runtime": {
"app_name": "rl_trainer",
"dry_run": false,
"exp_name": "original/RL_HT_49",
"hydra_runtime_config": {
"callbacks": {
"non_primary_rank_cleanup": {
"_target_": "pyine.configs.callbacks.NonPrimaryRankCleanupCallback"
}
},
"env": {},
"help": {
"app_name": "rl_trainer",
"footer": "Powered by Hydra (https://hydra.cc)\nUse --hydra-help to view Hydra specific help\n",
"header": "rl_trainer is powered by Hydra.\n",
"template": "rl_trainer is powered by Hydra.\n\n== Configuration groups ==\nCompose your configuration from those groups (group=option)\n\n$APP_CONFIG_GROUPS\n\n== Config ==\nOverride anything in the config (foo.bar=value)\n\n$CONFIG\n\nPowered by Hydra (https://hydra.cc)\nUse --hydra-help to view Hydra specific help\n\n"
},
"hydra_help": {
"hydra_help": "???",
"template": "Hydra (1.3.2)\nSee https://hydra.cc for more info.\n\n== Flags ==\n$FLAGS_HELP\n\n== Configuration groups ==\nCompose your configuration from those groups (For example, append hydra/job_logging=disabled to command line)\n\n$HYDRA_CONFIG_GROUPS\n\nUse '--cfg hydra' to Show the Hydra config.\n"
},
"hydra_logging": {
"disable_existing_loggers": false,
"formatters": {
"colorlog": {
"()": "colorlog.ColoredFormatter",
"format": "[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s"
}
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"formatter": "colorlog",
"stream": "ext://sys.stdout"
}
},
"root": {
"handlers": [
"console"
],
"level": "INFO"
},
"version": 1
},
"job": {
"chdir": null,
"config": {
"override_dirname": {
"exclude_keys": [],
"item_sep": ",",
"kv_sep": "="
}
},
"config_name": "entrypoint",
"env_copy": [],
"env_set": {},
"id": "???",
"name": "rl_trainer",
"num": "???",
"override_dirname": "+experiment=original/v0_rl.yaml"
},
"job_logging": {
"disable_existing_loggers": false,
"formatters": {
"colorlog": {
"()": "colorlog.ColoredFormatter",
"format": "[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s",
"log_colors": {
"CRITICAL": "red",
"DEBUG": "purple",
"ERROR": "red",
"INFO": "green",
"WARNING": "yellow"
}
},
"simple": {
"format": "[%(asctime)s][%(name)s][%(levelname)s] - %(message)s"
}
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"formatter": "colorlog",
"stream": "ext://sys.stdout"
},
"file": {
"class": "logging.FileHandler",
"filename": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49/output.log",
"formatter": "simple"
}
},
"loggers": {
"pyine": {
"level": "INFO"
}
},
"root": {
"handlers": [
"console",
"file"
],
"level": "INFO"
},
"version": 1
},
"launcher": {
"_target_": "hydra._internal.core_plugins.basic_launcher.BasicLauncher"
},
"mode": 1,
"output_subdir": ".hydra",
"overrides": {
"hydra": [
"hydra.mode=RUN"
],
"task": [
"+experiment=original/v0_rl.yaml"
]
},
"run": {
"dir": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49"
},
"runtime": {
"choices": {
"config": "base",
"config/datamodule_config": "shortcuts_TACO_10s10t_v1_full",
"config/evals_config": "base",
"config/evals_config/evaluator_kwargs/llm_provider_config": "openai_gpt5nano_scoring",
"config/grpo_config": "train_default",
"experiment": "original/v0_rl.yaml",
"hydra/callbacks": null,
"hydra/env": "default",
"hydra/help": "default",
"hydra/hydra_help": "default",
"hydra/hydra_logging": "colorlog",
"hydra/job_logging": "colorlog",
"hydra/launcher": "basic",
"hydra/output": "default",
"hydra/sweeper": "basic",
"runtime": "default"
},
"config_sources": [
{
"path": "hydra.conf",
"provider": "hydra",
"schema": "pkg"
},
{
"path": "hydra_zen.wrapper",
"provider": "main",
"schema": "pkg"
},
{
"path": "hydra_plugins.hydra_colorlog.conf",
"provider": "hydra-colorlog",
"schema": "pkg"
},
{
"path": "/scratch/a.palmas/code-interp-benchmark/pyine/configs",
"provider": "pyine_cwd",
"schema": "file"
},
{
"path": "/scratch/a.palmas/code-interp-benchmark/pyine/configs",
"provider": "pyine_repo",
"schema": "file"
},
{
"path": "",
"provider": "schema",
"schema": "structured"
}
],
"cwd": "/scratch/a.palmas/code-interp-benchmark",
"output_dir": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49",
"version": "1.3.2",
"version_base": "1.3"
},
"searchpath": [],
"sweep": {
"dir": "/scratch/a.palmas/code-interp-benchmark/logs/sweeps/rl_trainer/original/RL_HT_49/original/RL_HT_49",
"subdir": "default"
},
"sweeper": {
"_target_": "hydra._internal.core_plugins.basic_sweeper.BasicSweeper",
"max_batch_size": null,
"params": null
},
"verbose": false
},
"metadata": {
"created_by": "a.palmas",
"data_root": "/scratch/a.palmas/code-interp-benchmark/data",
"dotenv_path": "/scratch/a.palmas/code-interp-benchmark/.env",
"framework_version": "0.1.4",
"git_repo_clean": "True",
"git_revision_hash": "f757ce0a0138eb0839c6aee27a1828f6aa4cb294",
"local_timestamp": "20260225-105907",
"logs_root": "/scratch/a.palmas/code-interp-benchmark/logs",
"platform": "uname_result(system='Linux', node='gpu04', release='6.8.0-100-generic', version='#100-Ubuntu SMP PREEMPT_DYNAMIC Tue Jan 13 16:40:06 UTC 2026', machine='x86_64')",
"project_root": "/scratch/a.palmas/code-interp-benchmark",
"python_version": "3.12.3",
"runtime_hash": "199a5a0a564dd9b6b8e116e1335ece9762a156ac",
"sys_argv": "['pyine/apps/trainers/hf_trainer.py', '+experiment=original/v0_rl.yaml']",
"sys_executable": "/scratch/a.palmas/code-interp-benchmark/.venv/bin/python3",
"time_since_epoch": "1772035147.9544945",
"tmp_dir": "/tmp/pyine/pyine-a.palmas",
"work_dir": "/scratch/a.palmas/code-interp-benchmark"
},
"notes": null,
"output_dir": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49",
"run_group": "original/RL_HT_49",
"run_name": "original/RL_HT_49",
"seed": 0,
"seed_workers": false,
"tags": [
"model:Qwen/Qwen3-4B-Instruct-2507",
"rl-training"
],
"wandb_run_dir": "/scratch/a.palmas/code-interp-benchmark/logs/runs/rl_trainer/original/RL_HT_49/original/RL_HT_49/wandb/run-20260225_105919-4567hspa/files",
"wandb_run_entity": "lawzero-default",
"wandb_run_id": "4567hspa",
"wandb_run_project": "pyine",
"wandb_run_url": "https://wandb.ai/lawzero-default/pyine/runs/4567hspa"
},
"shutdown_requested": false,
"training_loss": null
}