From 3cd946cc4c8c180534ad8c14911cb35b0e920bce Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Fri, 1 May 2026 18:53:17 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: alwaysgood/QWEN3-4B-CPT-stage2 Source: Original Platform --- .gitattributes | 38 + README.md | 59 + all_results.json | 12 + checkpoint-90/config.json | 74 + checkpoint-90/generation_config.json | 9 + checkpoint-90/model.safetensors | 3 + checkpoint-90/optimizer.pt | 3 + checkpoint-90/rng_state.pth | 3 + checkpoint-90/scheduler.pt | 3 + checkpoint-90/tokenizer.json | 3 + checkpoint-90/tokenizer_config.json | 15 + checkpoint-90/trainer_state.json | 105 + checkpoint-90/training_args.bin | 3 + config.json | 74 + .../results_2026-04-13T07-05-52.814598.json | 7825 +++++++++++++++++ eval/checkpoints/cpt/stdout.txt | 2460 ++++++ eval/summary.json | 8 + eval_results.json | 7 + generation_config.json | 9 + model.safetensors | 3 + tokenizer.json | 3 + tokenizer/tokenizer.json | 3 + tokenizer/tokenizer_config.json | 15 + tokenizer_config.json | 15 + train_results.json | 8 + trainer_state.json | 114 + training_args.bin | 3 + 27 files changed, 10877 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 all_results.json create mode 100644 checkpoint-90/config.json create mode 100644 checkpoint-90/generation_config.json create mode 100644 checkpoint-90/model.safetensors create mode 100644 checkpoint-90/optimizer.pt create mode 100644 checkpoint-90/rng_state.pth create mode 100644 checkpoint-90/scheduler.pt create mode 100644 checkpoint-90/tokenizer.json create mode 100644 checkpoint-90/tokenizer_config.json create mode 100644 checkpoint-90/trainer_state.json create mode 100644 checkpoint-90/training_args.bin create mode 100644 config.json create mode 100644 eval/checkpoints/cpt/__home__unsloth__scp_stage2_pd__artifacts__cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt__checkpoints/results_2026-04-13T07-05-52.814598.json create mode 100644 eval/checkpoints/cpt/stdout.txt create mode 100644 eval/summary.json create mode 100644 eval_results.json create mode 100644 generation_config.json create mode 100644 model.safetensors create mode 100644 tokenizer.json create mode 100644 tokenizer/tokenizer.json create mode 100644 tokenizer/tokenizer_config.json create mode 100644 tokenizer_config.json create mode 100644 train_results.json create mode 100644 trainer_state.json create mode 100644 training_args.bin diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..0098fe3 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,38 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-90/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..9098397 --- /dev/null +++ b/README.md @@ -0,0 +1,59 @@ +--- +base_model: alwaysgood/QWEN3-4B-CPT +library_name: transformers +model_name: checkpoints +tags: +- generated_from_trainer +- unsloth +- trl +- sft +licence: license +--- + +# Model Card for checkpoints + +This model is a fine-tuned version of [alwaysgood/QWEN3-4B-CPT](https://huggingface.co/alwaysgood/QWEN3-4B-CPT). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="None", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + +[Visualize in Weights & Biases](https://wandb.ai/hiloong/parallel-cpt/runs/zkww2vcc) + + +This model was trained with SFT. + +### Framework versions + +- TRL: 0.24.0 +- Transformers: 5.5.3 +- Pytorch: 2.9.0+cu128 +- Datasets: 4.3.0 +- Tokenizers: 0.22.2 + +## Citations + + + +Cite TRL as: + +```bibtex +@misc{vonwerra2022trl, + title = {{TRL: Transformer Reinforcement Learning}}, + author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec}, + year = 2020, + journal = {GitHub repository}, + publisher = {GitHub}, + howpublished = {\url{https://github.com/huggingface/trl}} +} +``` \ No newline at end of file diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..74743cf --- /dev/null +++ b/all_results.json @@ -0,0 +1,12 @@ +{ + "epoch": 1.0, + "eval_loss": 1.7952755689620972, + "eval_runtime": 1.7545, + "eval_samples_per_second": 41.038, + "eval_steps_per_second": 5.13, + "total_flos": 1.6013083311596544e+16, + "train_loss": 1.9445130242241753, + "train_runtime": 226.5501, + "train_samples_per_second": 15.727, + "train_steps_per_second": 0.397 +} \ No newline at end of file diff --git a/checkpoint-90/config.json b/checkpoint-90/config.json new file mode 100644 index 0000000..51646a4 --- /dev/null +++ b/checkpoint-90/config.json @@ -0,0 +1,74 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": null, + "dtype": "bfloat16", + "eos_token_id": 151643, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 36, + "model_name": "alwaysgood/QWEN3-4B-CPT", + "model_type": "qwen3", + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "pad_token_id": 151669, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 1000000, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "5.5.3", + "unsloth_fixed": true, + "unsloth_version": "2026.4.4", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/checkpoint-90/generation_config.json b/checkpoint-90/generation_config.json new file mode 100644 index 0000000..43f602b --- /dev/null +++ b/checkpoint-90/generation_config.json @@ -0,0 +1,9 @@ +{ + "eos_token_id": [ + 151643 + ], + "max_length": 32768, + "max_new_tokens": 2048, + "pad_token_id": 151669, + "transformers_version": "5.5.3" +} diff --git a/checkpoint-90/model.safetensors b/checkpoint-90/model.safetensors new file mode 100644 index 0000000..c923917 --- /dev/null +++ b/checkpoint-90/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb2e1f1757f163af8b269039a3ef7167612a4581b9ebe72658607b2037898ca4 +size 8044982080 diff --git a/checkpoint-90/optimizer.pt b/checkpoint-90/optimizer.pt new file mode 100644 index 0000000..d9496b0 --- /dev/null +++ b/checkpoint-90/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:245fd3aa0b66ca73cfe4f9d068a4cf0db0b0ad27f7b3ff05bc44e6ede1660c64 +size 14534393422 diff --git a/checkpoint-90/rng_state.pth b/checkpoint-90/rng_state.pth new file mode 100644 index 0000000..1feba1a --- /dev/null +++ b/checkpoint-90/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:098b29492211804ab324a36f37466821d948280bb74fce4ba895c03f13ecd878 +size 14645 diff --git a/checkpoint-90/scheduler.pt b/checkpoint-90/scheduler.pt new file mode 100644 index 0000000..cdb8369 --- /dev/null +++ b/checkpoint-90/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd4da37e1d2639c6a3c48eabc833153fc8f9bf81ee46a2bc40f3b4e879988db4 +size 1465 diff --git a/checkpoint-90/tokenizer.json b/checkpoint-90/tokenizer.json new file mode 100644 index 0000000..73037fe --- /dev/null +++ b/checkpoint-90/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45c4ffda6666cf6d75d0b1f961f25964e2a52a62e78aaecb2f458e9ba9824112 +size 11422840 diff --git a/checkpoint-90/tokenizer_config.json b/checkpoint-90/tokenizer_config.json new file mode 100644 index 0000000..d450540 --- /dev/null +++ b/checkpoint-90/tokenizer_config.json @@ -0,0 +1,15 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "is_local": false, + "model_max_length": 32768, + "pad_token": "<|PAD_TOKEN|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-90/trainer_state.json b/checkpoint-90/trainer_state.json new file mode 100644 index 0000000..0fd01f4 --- /dev/null +++ b/checkpoint-90/trainer_state.json @@ -0,0 +1,105 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 90, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.11204481792717087, + "grad_norm": 14.0, + "learning_rate": 1e-05, + "loss": 2.211936187744141, + "step": 10 + }, + { + "epoch": 0.22408963585434175, + "grad_norm": 7.21875, + "learning_rate": 9.628619846344453e-06, + "loss": 1.982374382019043, + "step": 20 + }, + { + "epoch": 0.33613445378151263, + "grad_norm": 6.75, + "learning_rate": 8.569648672789496e-06, + "loss": 1.939706802368164, + "step": 30 + }, + { + "epoch": 0.4481792717086835, + "grad_norm": 8.0, + "learning_rate": 6.980398830195785e-06, + "loss": 1.9272880554199219, + "step": 40 + }, + { + "epoch": 0.5602240896358543, + "grad_norm": 4.5, + "learning_rate": 5.096956658859122e-06, + "loss": 1.8303054809570312, + "step": 50 + }, + { + "epoch": 0.6722689075630253, + "grad_norm": 5.53125, + "learning_rate": 3.1991113759764493e-06, + "loss": 1.8838277816772462, + "step": 60 + }, + { + "epoch": 0.7843137254901961, + "grad_norm": 4.875, + "learning_rate": 1.5687918106563326e-06, + "loss": 1.8752876281738282, + "step": 70 + }, + { + "epoch": 0.896358543417367, + "grad_norm": 4.53125, + "learning_rate": 4.481852951692672e-07, + "loss": 1.8907934188842774, + "step": 80 + }, + { + "epoch": 1.0, + "grad_norm": 26.25, + "learning_rate": 3.760237478849793e-09, + "loss": 1.9590974807739259, + "step": 90 + }, + { + "epoch": 1.0, + "eval_loss": 1.7950044870376587, + "eval_runtime": 4.0343, + "eval_samples_per_second": 17.847, + "eval_steps_per_second": 2.231, + "step": 90 + } + ], + "logging_steps": 10, + "max_steps": 90, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.6013083311596544e+16, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-90/training_args.bin b/checkpoint-90/training_args.bin new file mode 100644 index 0000000..d9d5c40 --- /dev/null +++ b/checkpoint-90/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a665cf825105cd5773c40316561897beba1d0ede4c24ba1bf17b1b2984ae2d +size 5777 diff --git a/config.json b/config.json new file mode 100644 index 0000000..51646a4 --- /dev/null +++ b/config.json @@ -0,0 +1,74 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": null, + "dtype": "bfloat16", + "eos_token_id": 151643, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 36, + "model_name": "alwaysgood/QWEN3-4B-CPT", + "model_type": "qwen3", + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "pad_token_id": 151669, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 1000000, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "5.5.3", + "unsloth_fixed": true, + "unsloth_version": "2026.4.4", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/eval/checkpoints/cpt/__home__unsloth__scp_stage2_pd__artifacts__cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt__checkpoints/results_2026-04-13T07-05-52.814598.json b/eval/checkpoints/cpt/__home__unsloth__scp_stage2_pd__artifacts__cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt__checkpoints/results_2026-04-13T07-05-52.814598.json new file mode 100644 index 0000000..a42674c --- /dev/null +++ b/eval/checkpoints/cpt/__home__unsloth__scp_stage2_pd__artifacts__cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt__checkpoints/results_2026-04-13T07-05-52.814598.json @@ -0,0 +1,7825 @@ +{ + "results": { + "arc_challenge": { + "alias": "arc_challenge", + "acc,none": 0.5, + "acc_stderr,none": 0.02503130871608794, + "acc_norm,none": 0.535, + "acc_norm_stderr,none": 0.02496990669989584 + }, + "arc_easy": { + "alias": "arc_easy", + "acc,none": 0.79, + "acc_stderr,none": 0.020390916450326858, + "acc_norm,none": 0.805, + "acc_norm_stderr,none": 0.0198348444065381 + }, + "hellaswag": { + "alias": "hellaswag", + "acc,none": 0.505, + "acc_stderr,none": 0.02503005711936146, + "acc_norm,none": 0.63, + "acc_norm_stderr,none": 0.024170447375168467 + }, + "kmmlu": { + "acc,none": 0.459300064808814, + "acc_stderr,none": 0.003912015372025933, + "alias": "kmmlu" + }, + "kmmlu_applied_science": { + "acc,none": 0.44479166666666664, + "acc_stderr,none": 0.00709881931685827, + "alias": " - kmmlu_applied_science" + }, + "kmmlu_aviation_engineering_and_maintenance": { + "alias": " - kmmlu_aviation_engineering_and_maintenance", + "acc,none": 0.45, + "acc_stderr,none": 0.02490583770684492 + }, + "kmmlu_electronics_engineering": { + "alias": " - kmmlu_electronics_engineering", + "acc,none": 0.6025, + "acc_stderr,none": 0.024499693108404715 + }, + "kmmlu_energy_management": { + "alias": " - kmmlu_energy_management", + "acc,none": 0.3875, + "acc_stderr,none": 0.02438947500927542 + }, + "kmmlu_environmental_science": { + "alias": " - kmmlu_environmental_science", + "acc,none": 0.3725, + "acc_stderr,none": 0.024203800008203095 + }, + "kmmlu_gas_technology_and_engineering": { + "alias": " - kmmlu_gas_technology_and_engineering", + "acc,none": 0.3875, + "acc_stderr,none": 0.024389475009275415 + }, + "kmmlu_geomatics": { + "alias": " - kmmlu_geomatics", + "acc,none": 0.4125, + "acc_stderr,none": 0.0246450364079438 + }, + "kmmlu_industrial_engineer": { + "alias": " - kmmlu_industrial_engineer", + "acc,none": 0.4175, + "acc_stderr,none": 0.024688218756390913 + }, + "kmmlu_machine_design_and_manufacturing": { + "alias": " - kmmlu_machine_design_and_manufacturing", + "acc,none": 0.5, + "acc_stderr,none": 0.02503130871608794 + }, + "kmmlu_maritime_engineering": { + "alias": " - kmmlu_maritime_engineering", + "acc,none": 0.4075, + "acc_stderr,none": 0.02459923129797198 + }, + "kmmlu_nondestructive_testing": { + "alias": " - kmmlu_nondestructive_testing", + "acc,none": 0.4625, + "acc_stderr,none": 0.024960808880119853 + }, + "kmmlu_railway_and_automotive_engineering": { + "alias": " - kmmlu_railway_and_automotive_engineering", + "acc,none": 0.36, + "acc_stderr,none": 0.0240300563674444 + }, + "kmmlu_telecommunications_and_wireless_technology": { + "alias": " - kmmlu_telecommunications_and_wireless_technology", + "acc,none": 0.5775, + "acc_stderr,none": 0.024728792083212124 + }, + "kmmlu_humss": { + "acc,none": 0.4604395604395604, + "acc_stderr,none": 0.009405507500045103, + "alias": " - kmmlu_humss" + }, + "kmmlu_accounting": { + "alias": " - kmmlu_accounting", + "acc,none": 0.44, + "acc_stderr,none": 0.04988876515698589 + }, + "kmmlu_criminal_law": { + "alias": " - kmmlu_criminal_law", + "acc,none": 0.34, + "acc_stderr,none": 0.033580324461725736 + }, + "kmmlu_economics": { + "alias": " - kmmlu_economics", + "acc,none": 0.5692307692307692, + "acc_stderr,none": 0.04359851186846964 + }, + "kmmlu_education": { + "alias": " - kmmlu_education", + "acc,none": 0.61, + "acc_stderr,none": 0.04902071300001975 + }, + "kmmlu_korean_history": { + "alias": " - kmmlu_korean_history", + "acc,none": 0.28, + "acc_stderr,none": 0.045126085985421276 + }, + "kmmlu_law": { + "alias": " - kmmlu_law", + "acc,none": 0.3625, + "acc_stderr,none": 0.024066207238097756 + }, + "kmmlu_management": { + "alias": " - kmmlu_management", + "acc,none": 0.51, + "acc_stderr,none": 0.02502630195361833 + }, + "kmmlu_political_science_and_sociology": { + "alias": " - kmmlu_political_science_and_sociology", + "acc,none": 0.5366666666666666, + "acc_stderr,none": 0.02883789055433726 + }, + "kmmlu_psychology": { + "alias": " - kmmlu_psychology", + "acc,none": 0.42, + "acc_stderr,none": 0.024708830724853678 + }, + "kmmlu_social_welfare": { + "alias": " - kmmlu_social_welfare", + "acc,none": 0.5575, + "acc_stderr,none": 0.02486523828938644 + }, + "kmmlu_taxation": { + "alias": " - kmmlu_taxation", + "acc,none": 0.405, + "acc_stderr,none": 0.03479841445010401 + }, + "kmmlu_other": { + "acc,none": 0.4625, + "acc_stderr,none": 0.008030101105270982, + "alias": " - kmmlu_other" + }, + "kmmlu_agricultural_sciences": { + "alias": " - kmmlu_agricultural_sciences", + "acc,none": 0.34, + "acc_stderr,none": 0.02371510099151158 + }, + "kmmlu_construction": { + "alias": " - kmmlu_construction", + "acc,none": 0.3875, + "acc_stderr,none": 0.02438947500927542 + }, + "kmmlu_fashion": { + "alias": " - kmmlu_fashion", + "acc,none": 0.45, + "acc_stderr,none": 0.024905837706844923 + }, + "kmmlu_food_processing": { + "alias": " - kmmlu_food_processing", + "acc,none": 0.365, + "acc_stderr,none": 0.024101653974588085 + }, + "kmmlu_health": { + "alias": " - kmmlu_health", + "acc,none": 0.57, + "acc_stderr,none": 0.04975698519562428 + }, + "kmmlu_interior_architecture_and_design": { + "alias": " - kmmlu_interior_architecture_and_design", + "acc,none": 0.6, + "acc_stderr,none": 0.02452557357939856 + }, + "kmmlu_marketing": { + "alias": " - kmmlu_marketing", + "acc,none": 0.7625, + "acc_stderr,none": 0.02130420258115869 + }, + "kmmlu_patent": { + "alias": " - kmmlu_patent", + "acc,none": 0.43, + "acc_stderr,none": 0.049756985195624284 + }, + "kmmlu_public_safety": { + "alias": " - kmmlu_public_safety", + "acc,none": 0.38, + "acc_stderr,none": 0.024299715851758236 + }, + "kmmlu_real_estate": { + "alias": " - kmmlu_real_estate", + "acc,none": 0.475, + "acc_stderr,none": 0.035399727449764204 + }, + "kmmlu_refrigerating_machinery": { + "alias": " - kmmlu_refrigerating_machinery", + "acc,none": 0.39, + "acc_stderr,none": 0.02441803844504636 + }, + "kmmlu_stem": { + "acc,none": 0.4720930232558139, + "acc_stderr,none": 0.0073082778415419075, + "alias": " - kmmlu_stem" + }, + "kmmlu_biology": { + "alias": " - kmmlu_biology", + "acc,none": 0.2975, + "acc_stderr,none": 0.02288654320007986 + }, + "kmmlu_chemical_engineering": { + "alias": " - kmmlu_chemical_engineering", + "acc,none": 0.4875, + "acc_stderr,none": 0.025023485209500245 + }, + "kmmlu_chemistry": { + "alias": " - kmmlu_chemistry", + "acc,none": 0.5, + "acc_stderr,none": 0.02503130871608794 + }, + "kmmlu_civil_engineering": { + "alias": " - kmmlu_civil_engineering", + "acc,none": 0.4, + "acc_stderr,none": 0.02452557357939856 + }, + "kmmlu_computer_science": { + "alias": " - kmmlu_computer_science", + "acc,none": 0.7375, + "acc_stderr,none": 0.02202719610892523 + }, + "kmmlu_ecology": { + "alias": " - kmmlu_ecology", + "acc,none": 0.5025, + "acc_stderr,none": 0.025030995822773412 + }, + "kmmlu_electrical_engineering": { + "alias": " - kmmlu_electrical_engineering", + "acc,none": 0.335, + "acc_stderr,none": 0.023629078131262988 + }, + "kmmlu_information_technology": { + "alias": " - kmmlu_information_technology", + "acc,none": 0.7275, + "acc_stderr,none": 0.022290166077429736 + }, + "kmmlu_materials_engineering": { + "alias": " - kmmlu_materials_engineering", + "acc,none": 0.4625, + "acc_stderr,none": 0.02496080888011985 + }, + "kmmlu_math": { + "alias": " - kmmlu_math", + "acc,none": 0.32666666666666666, + "acc_stderr,none": 0.02712263463512255 + }, + "kmmlu_mechanical_engineering": { + "alias": " - kmmlu_mechanical_engineering", + "acc,none": 0.38, + "acc_stderr,none": 0.02429971585175824 + }, + "kobest_boolq": { + "alias": "kobest_boolq", + "acc,none": 0.8, + "acc_stderr,none": 0.020025046972870324, + "f1,none": 0.7920943891473271, + "f1_stderr,none": "N/A" + }, + "kobest_copa": { + "alias": "kobest_copa", + "acc,none": 0.6425, + "acc_stderr,none": 0.02399319817984353, + "f1,none": 0.6424441318956087, + "f1_stderr,none": "N/A" + }, + "kobest_hellaswag": { + "alias": "kobest_hellaswag", + "acc,none": 0.4225, + "acc_stderr,none": 0.024728792083212124, + "f1,none": 0.41473123146363744, + "f1_stderr,none": "N/A", + "acc_norm,none": 0.555, + "acc_norm_stderr,none": 0.024879408401447513 + }, + "mmlu": { + "acc,none": 0.7340872098003941, + "acc_stderr,none": 0.0039017502395412695, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.6843302990897269, + "acc_stderr,none": 0.007802473252366281, + "alias": " - humanities" + }, + "mmlu_formal_logic": { + "alias": " - formal_logic", + "acc,none": 0.5873015873015873, + "acc_stderr,none": 0.04403438954768177 + }, + "mmlu_high_school_european_history": { + "alias": " - high_school_european_history", + "acc,none": 0.7818181818181819, + "acc_stderr,none": 0.03225078108306289 + }, + "mmlu_high_school_us_history": { + "alias": " - high_school_us_history", + "acc,none": 0.8235294117647058, + "acc_stderr,none": 0.02675640153807896 + }, + "mmlu_high_school_world_history": { + "alias": " - high_school_world_history", + "acc,none": 0.8354430379746836, + "acc_stderr,none": 0.024135736240566936 + }, + "mmlu_international_law": { + "alias": " - international_law", + "acc,none": 0.8099173553719008, + "acc_stderr,none": 0.035817969517092825 + }, + "mmlu_jurisprudence": { + "alias": " - jurisprudence", + "acc,none": 0.8148148148148148, + "acc_stderr,none": 0.03755265865037183 + }, + "mmlu_logical_fallacies": { + "alias": " - logical_fallacies", + "acc,none": 0.8404907975460123, + "acc_stderr,none": 0.02876748172598386 + }, + "mmlu_moral_disputes": { + "alias": " - moral_disputes", + "acc,none": 0.7427745664739884, + "acc_stderr,none": 0.023532925431044276 + }, + "mmlu_moral_scenarios": { + "alias": " - moral_scenarios", + "acc,none": 0.33, + "acc_stderr,none": 0.02354007940398386 + }, + "mmlu_philosophy": { + "alias": " - philosophy", + "acc,none": 0.7395498392282959, + "acc_stderr,none": 0.024926723224845557 + }, + "mmlu_prehistory": { + "alias": " - prehistory", + "acc,none": 0.7808641975308642, + "acc_stderr,none": 0.023016705640262206 + }, + "mmlu_professional_law": { + "alias": " - professional_law", + "acc,none": 0.505, + "acc_stderr,none": 0.025030057119361453 + }, + "mmlu_world_religions": { + "alias": " - world_religions", + "acc,none": 0.8128654970760234, + "acc_stderr,none": 0.029913127232368032 + }, + "mmlu_other": { + "acc,none": 0.7389867841409692, + "acc_stderr,none": 0.008126062949704117, + "alias": " - other" + }, + "mmlu_business_ethics": { + "alias": " - business_ethics", + "acc,none": 0.76, + "acc_stderr,none": 0.04292346959909282 + }, + "mmlu_clinical_knowledge": { + "alias": " - clinical_knowledge", + "acc,none": 0.7622641509433963, + "acc_stderr,none": 0.026199808807561918 + }, + "mmlu_college_medicine": { + "alias": " - college_medicine", + "acc,none": 0.7398843930635838, + "acc_stderr,none": 0.033450369167889904 + }, + "mmlu_global_facts": { + "alias": " - global_facts", + "acc,none": 0.43, + "acc_stderr,none": 0.049756985195624284 + }, + "mmlu_human_aging": { + "alias": " - human_aging", + "acc,none": 0.7443946188340808, + "acc_stderr,none": 0.029275891003969927 + }, + "mmlu_management": { + "alias": " - management", + "acc,none": 0.8737864077669902, + "acc_stderr,none": 0.032881802788086265 + }, + "mmlu_marketing": { + "alias": " - marketing", + "acc,none": 0.8974358974358975, + "acc_stderr,none": 0.01987565502786744 + }, + "mmlu_medical_genetics": { + "alias": " - medical_genetics", + "acc,none": 0.81, + "acc_stderr,none": 0.039427724440366234 + }, + "mmlu_miscellaneous": { + "alias": " - miscellaneous", + "acc,none": 0.815, + "acc_stderr,none": 0.019439192511876188 + }, + "mmlu_nutrition": { + "alias": " - nutrition", + "acc,none": 0.7679738562091504, + "acc_stderr,none": 0.024170840879340887 + }, + "mmlu_professional_accounting": { + "alias": " - professional_accounting", + "acc,none": 0.5673758865248227, + "acc_stderr,none": 0.02955545423677885 + }, + "mmlu_professional_medicine": { + "alias": " - professional_medicine", + "acc,none": 0.7757352941176471, + "acc_stderr,none": 0.025336848563332355 + }, + "mmlu_virology": { + "alias": " - virology", + "acc,none": 0.5120481927710844, + "acc_stderr,none": 0.03891364495835817 + }, + "mmlu_social_sciences": { + "acc,none": 0.8139705882352941, + "acc_stderr,none": 0.007317981445928869, + "alias": " - social sciences" + }, + "mmlu_econometrics": { + "alias": " - econometrics", + "acc,none": 0.6403508771929824, + "acc_stderr,none": 0.04514496132873633 + }, + "mmlu_high_school_geography": { + "alias": " - high_school_geography", + "acc,none": 0.8636363636363636, + "acc_stderr,none": 0.024450155973189835 + }, + "mmlu_high_school_government_and_politics": { + "alias": " - high_school_government_and_politics", + "acc,none": 0.8756476683937824, + "acc_stderr,none": 0.02381447708659356 + }, + "mmlu_high_school_macroeconomics": { + "alias": " - high_school_macroeconomics", + "acc,none": 0.8051282051282052, + "acc_stderr,none": 0.020083167595181393 + }, + "mmlu_high_school_microeconomics": { + "alias": " - high_school_microeconomics", + "acc,none": 0.9033613445378151, + "acc_stderr,none": 0.019192520709708723 + }, + "mmlu_high_school_psychology": { + "alias": " - high_school_psychology", + "acc,none": 0.905, + "acc_stderr,none": 0.014679107277903242 + }, + "mmlu_human_sexuality": { + "alias": " - human_sexuality", + "acc,none": 0.7633587786259542, + "acc_stderr,none": 0.037276735755969154 + }, + "mmlu_professional_psychology": { + "alias": " - professional_psychology", + "acc,none": 0.7325, + "acc_stderr,none": 0.022160484939004153 + }, + "mmlu_public_relations": { + "alias": " - public_relations", + "acc,none": 0.6636363636363637, + "acc_stderr,none": 0.04525393596302505 + }, + "mmlu_security_studies": { + "alias": " - security_studies", + "acc,none": 0.746938775510204, + "acc_stderr,none": 0.0278330238713997 + }, + "mmlu_sociology": { + "alias": " - sociology", + "acc,none": 0.8557213930348259, + "acc_stderr,none": 0.02484575321230605 + }, + "mmlu_us_foreign_policy": { + "alias": " - us_foreign_policy", + "acc,none": 0.89, + "acc_stderr,none": 0.03144660377352203 + }, + "mmlu_stem": { + "acc,none": 0.7094830320329845, + "acc_stderr,none": 0.007847594320390904, + "alias": " - stem" + }, + "mmlu_abstract_algebra": { + "alias": " - abstract_algebra", + "acc,none": 0.47, + "acc_stderr,none": 0.050161355804659205 + }, + "mmlu_anatomy": { + "alias": " - anatomy", + "acc,none": 0.674074074074074, + "acc_stderr,none": 0.040491220417025055 + }, + "mmlu_astronomy": { + "alias": " - astronomy", + "acc,none": 0.8289473684210527, + "acc_stderr,none": 0.030643607071677088 + }, + "mmlu_college_biology": { + "alias": " - college_biology", + "acc,none": 0.8125, + "acc_stderr,none": 0.032639560491693344 + }, + "mmlu_college_chemistry": { + "alias": " - college_chemistry", + "acc,none": 0.53, + "acc_stderr,none": 0.050161355804659205 + }, + "mmlu_college_computer_science": { + "alias": " - college_computer_science", + "acc,none": 0.7, + "acc_stderr,none": 0.046056618647183814 + }, + "mmlu_college_mathematics": { + "alias": " - college_mathematics", + "acc,none": 0.54, + "acc_stderr,none": 0.05009082659620332 + }, + "mmlu_college_physics": { + "alias": " - college_physics", + "acc,none": 0.5784313725490197, + "acc_stderr,none": 0.049135952012745045 + }, + "mmlu_computer_security": { + "alias": " - computer_security", + "acc,none": 0.81, + "acc_stderr,none": 0.039427724440366234 + }, + "mmlu_conceptual_physics": { + "alias": " - conceptual_physics", + "acc,none": 0.7957446808510639, + "acc_stderr,none": 0.02635515841334943 + }, + "mmlu_electrical_engineering": { + "alias": " - electrical_engineering", + "acc,none": 0.7586206896551724, + "acc_stderr,none": 0.03565998174135302 + }, + "mmlu_elementary_mathematics": { + "alias": " - elementary_mathematics", + "acc,none": 0.6798941798941799, + "acc_stderr,none": 0.024026846392873502 + }, + "mmlu_high_school_biology": { + "alias": " - high_school_biology", + "acc,none": 0.8903225806451613, + "acc_stderr,none": 0.017776778700485173 + }, + "mmlu_high_school_chemistry": { + "alias": " - high_school_chemistry", + "acc,none": 0.7241379310344828, + "acc_stderr,none": 0.0314471258167824 + }, + "mmlu_high_school_computer_science": { + "alias": " - high_school_computer_science", + "acc,none": 0.85, + "acc_stderr,none": 0.0358870281282637 + }, + "mmlu_high_school_mathematics": { + "alias": " - high_school_mathematics", + "acc,none": 0.5518518518518518, + "acc_stderr,none": 0.030321167196316286 + }, + "mmlu_high_school_physics": { + "alias": " - high_school_physics", + "acc,none": 0.6688741721854304, + "acc_stderr,none": 0.038425817186598696 + }, + "mmlu_high_school_statistics": { + "alias": " - high_school_statistics", + "acc,none": 0.7361111111111112, + "acc_stderr,none": 0.030058202704309846 + }, + "mmlu_machine_learning": { + "alias": " - machine_learning", + "acc,none": 0.6071428571428571, + "acc_stderr,none": 0.04635550135609976 + }, + "winogrande": { + "alias": "winogrande", + "acc,none": 0.745, + "acc_stderr,none": 0.021820353332356303 + } + }, + "groups": { + "kmmlu": { + "acc,none": 0.459300064808814, + "acc_stderr,none": 0.003912015372025933, + "alias": "kmmlu" + }, + "kmmlu_applied_science": { + "acc,none": 0.44479166666666664, + "acc_stderr,none": 0.00709881931685827, + "alias": " - kmmlu_applied_science" + }, + "kmmlu_humss": { + "acc,none": 0.4604395604395604, + "acc_stderr,none": 0.009405507500045103, + "alias": " - kmmlu_humss" + }, + "kmmlu_other": { + "acc,none": 0.4625, + "acc_stderr,none": 0.008030101105270982, + "alias": " - kmmlu_other" + }, + "kmmlu_stem": { + "acc,none": 0.4720930232558139, + "acc_stderr,none": 0.0073082778415419075, + "alias": " - kmmlu_stem" + }, + "mmlu": { + "acc,none": 0.7340872098003941, + "acc_stderr,none": 0.0039017502395412695, + "alias": "mmlu" + }, + "mmlu_humanities": { + "acc,none": 0.6843302990897269, + "acc_stderr,none": 0.007802473252366281, + "alias": " - humanities" + }, + "mmlu_other": { + "acc,none": 0.7389867841409692, + "acc_stderr,none": 0.008126062949704117, + "alias": " - other" + }, + "mmlu_social_sciences": { + "acc,none": 0.8139705882352941, + "acc_stderr,none": 0.007317981445928869, + "alias": " - social sciences" + }, + "mmlu_stem": { + "acc,none": 0.7094830320329845, + "acc_stderr,none": 0.007847594320390904, + "alias": " - stem" + } + }, + "group_subtasks": { + "mmlu_humanities": [ + "mmlu_formal_logic", + "mmlu_high_school_european_history", + "mmlu_high_school_us_history", + "mmlu_high_school_world_history", + "mmlu_international_law", + "mmlu_jurisprudence", + "mmlu_logical_fallacies", + "mmlu_moral_disputes", + "mmlu_moral_scenarios", + "mmlu_philosophy", + "mmlu_prehistory", + "mmlu_professional_law", + "mmlu_world_religions" + ], + "mmlu_social_sciences": [ + "mmlu_econometrics", + "mmlu_high_school_geography", + "mmlu_high_school_government_and_politics", + "mmlu_high_school_macroeconomics", + "mmlu_high_school_microeconomics", + "mmlu_high_school_psychology", + "mmlu_human_sexuality", + "mmlu_professional_psychology", + "mmlu_public_relations", + "mmlu_security_studies", + "mmlu_sociology", + "mmlu_us_foreign_policy" + ], + "mmlu_other": [ + "mmlu_business_ethics", + "mmlu_clinical_knowledge", + "mmlu_college_medicine", + "mmlu_global_facts", + "mmlu_human_aging", + "mmlu_management", + "mmlu_marketing", + "mmlu_medical_genetics", + "mmlu_miscellaneous", + "mmlu_nutrition", + "mmlu_professional_accounting", + "mmlu_professional_medicine", + "mmlu_virology" + ], + "mmlu_stem": [ + "mmlu_abstract_algebra", + "mmlu_anatomy", + "mmlu_astronomy", + "mmlu_college_biology", + "mmlu_college_chemistry", + "mmlu_college_computer_science", + "mmlu_college_mathematics", + "mmlu_college_physics", + "mmlu_computer_security", + "mmlu_conceptual_physics", + "mmlu_electrical_engineering", + "mmlu_elementary_mathematics", + "mmlu_high_school_biology", + "mmlu_high_school_chemistry", + "mmlu_high_school_computer_science", + "mmlu_high_school_mathematics", + "mmlu_high_school_physics", + "mmlu_high_school_statistics", + "mmlu_machine_learning" + ], + "mmlu": [ + "mmlu_stem", + "mmlu_other", + "mmlu_social_sciences", + "mmlu_humanities" + ], + "hellaswag": [], + "arc_easy": [], + "arc_challenge": [], + "winogrande": [], + "kmmlu_humss": [ + "kmmlu_accounting", + "kmmlu_criminal_law", + "kmmlu_economics", + "kmmlu_education", + "kmmlu_korean_history", + "kmmlu_law", + "kmmlu_management", + "kmmlu_political_science_and_sociology", + "kmmlu_psychology", + "kmmlu_social_welfare", + "kmmlu_taxation" + ], + "kmmlu_applied_science": [ + "kmmlu_aviation_engineering_and_maintenance", + "kmmlu_electronics_engineering", + "kmmlu_energy_management", + "kmmlu_environmental_science", + "kmmlu_gas_technology_and_engineering", + "kmmlu_geomatics", + "kmmlu_industrial_engineer", + "kmmlu_machine_design_and_manufacturing", + "kmmlu_maritime_engineering", + "kmmlu_nondestructive_testing", + "kmmlu_railway_and_automotive_engineering", + "kmmlu_telecommunications_and_wireless_technology" + ], + "kmmlu_other": [ + "kmmlu_agricultural_sciences", + "kmmlu_construction", + "kmmlu_fashion", + "kmmlu_food_processing", + "kmmlu_health", + "kmmlu_interior_architecture_and_design", + "kmmlu_marketing", + "kmmlu_patent", + "kmmlu_public_safety", + "kmmlu_real_estate", + "kmmlu_refrigerating_machinery" + ], + "kmmlu_stem": [ + "kmmlu_biology", + "kmmlu_chemical_engineering", + "kmmlu_chemistry", + "kmmlu_civil_engineering", + "kmmlu_computer_science", + "kmmlu_ecology", + "kmmlu_electrical_engineering", + "kmmlu_information_technology", + "kmmlu_materials_engineering", + "kmmlu_math", + "kmmlu_mechanical_engineering" + ], + "kmmlu": [ + "kmmlu_stem", + "kmmlu_other", + "kmmlu_applied_science", + "kmmlu_humss" + ], + "kobest_boolq": [], + "kobest_copa": [], + "kobest_hellaswag": [] + }, + "configs": { + "arc_challenge": { + "task": "arc_challenge", + "tag": [ + "ai2_arc" + ], + "dataset_path": "allenai/ai2_arc", + "dataset_name": "ARC-Challenge", + "training_split": "train", + "validation_split": "validation", + "test_split": "test", + "doc_to_text": "Question: {{question}}\nAnswer:", + "doc_to_target": "{{choices.label.index(answerKey)}}", + "unsafe_code": false, + "doc_to_choice": "{{choices.text}}", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": null, + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "Question: {{question}}\nAnswer:", + "doc_to_choice": "{{choices.text}}", + "doc_to_target": "{{choices.label.index(answerKey)}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + }, + { + "metric": "acc_norm", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": true, + "doc_to_decontamination_query": "Question: {{question}}\nAnswer:", + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "arc_easy": { + "task": "arc_easy", + "tag": [ + "ai2_arc" + ], + "dataset_path": "allenai/ai2_arc", + "dataset_name": "ARC-Easy", + "training_split": "train", + "validation_split": "validation", + "test_split": "test", + "doc_to_text": "Question: {{question}}\nAnswer:", + "doc_to_target": "{{choices.label.index(answerKey)}}", + "unsafe_code": false, + "doc_to_choice": "{{choices.text}}", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": null, + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "Question: {{question}}\nAnswer:", + "doc_to_choice": "{{choices.text}}", + "doc_to_target": "{{choices.label.index(answerKey)}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + }, + { + "metric": "acc_norm", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": true, + "doc_to_decontamination_query": "Question: {{question}}\nAnswer:", + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "hellaswag": { + "task": "hellaswag", + "tag": [ + "multiple_choice" + ], + "dataset_path": "Rowan/hellaswag", + "training_split": "train", + "validation_split": "validation", + "process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n def _process_doc(doc):\n ctx = doc[\"ctx_a\"] + \" \" + doc[\"ctx_b\"].capitalize()\n out_doc = {\n \"query\": preprocess(doc[\"activity_label\"] + \": \" + ctx),\n \"choices\": [preprocess(ending) for ending in doc[\"endings\"]],\n \"gold\": int(doc[\"label\"]),\n }\n return out_doc\n\n return dataset.map(_process_doc)\n", + "doc_to_text": "{{query}}", + "doc_to_target": "{{label}}", + "unsafe_code": false, + "doc_to_choice": "choices", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": null, + "process_docs": "", + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{query}}", + "doc_to_choice": "choices", + "doc_to_target": "{{label}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + }, + { + "metric": "acc_norm", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_accounting": { + "task": "kmmlu_accounting", + "tag": "kmmlu_humss_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Accounting", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_agricultural_sciences": { + "task": "kmmlu_agricultural_sciences", + "tag": "kmmlu_other_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Agricultural-Sciences", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_aviation_engineering_and_maintenance": { + "task": "kmmlu_aviation_engineering_and_maintenance", + "tag": "kmmlu_applied_science_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Aviation-Engineering-and-Maintenance", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_biology": { + "task": "kmmlu_biology", + "tag": "kmmlu_stem_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Biology", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_chemical_engineering": { + "task": "kmmlu_chemical_engineering", + "tag": "kmmlu_stem_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Chemical-Engineering", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_chemistry": { + "task": "kmmlu_chemistry", + "tag": "kmmlu_stem_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Chemistry", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_civil_engineering": { + "task": "kmmlu_civil_engineering", + "tag": "kmmlu_stem_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Civil-Engineering", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_computer_science": { + "task": "kmmlu_computer_science", + "tag": "kmmlu_stem_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Computer-Science", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_construction": { + "task": "kmmlu_construction", + "tag": "kmmlu_other_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Construction", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_criminal_law": { + "task": "kmmlu_criminal_law", + "tag": "kmmlu_humss_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Criminal-Law", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_ecology": { + "task": "kmmlu_ecology", + "tag": "kmmlu_stem_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Ecology", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_economics": { + "task": "kmmlu_economics", + "tag": "kmmlu_humss_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Economics", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_education": { + "task": "kmmlu_education", + "tag": "kmmlu_humss_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Education", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_electrical_engineering": { + "task": "kmmlu_electrical_engineering", + "tag": "kmmlu_stem_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Electrical-Engineering", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_electronics_engineering": { + "task": "kmmlu_electronics_engineering", + "tag": "kmmlu_applied_science_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Electronics-Engineering", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_energy_management": { + "task": "kmmlu_energy_management", + "tag": "kmmlu_applied_science_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Energy-Management", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_environmental_science": { + "task": "kmmlu_environmental_science", + "tag": "kmmlu_applied_science_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Environmental-Science", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_fashion": { + "task": "kmmlu_fashion", + "tag": "kmmlu_other_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Fashion", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_food_processing": { + "task": "kmmlu_food_processing", + "tag": "kmmlu_other_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Food-Processing", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_gas_technology_and_engineering": { + "task": "kmmlu_gas_technology_and_engineering", + "tag": "kmmlu_applied_science_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Gas-Technology-and-Engineering", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_geomatics": { + "task": "kmmlu_geomatics", + "tag": "kmmlu_applied_science_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Geomatics", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_health": { + "task": "kmmlu_health", + "tag": "kmmlu_other_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Health", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_industrial_engineer": { + "task": "kmmlu_industrial_engineer", + "tag": "kmmlu_applied_science_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Industrial-Engineer", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_information_technology": { + "task": "kmmlu_information_technology", + "tag": "kmmlu_stem_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Information-Technology", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_interior_architecture_and_design": { + "task": "kmmlu_interior_architecture_and_design", + "tag": "kmmlu_other_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Interior-Architecture-and-Design", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_korean_history": { + "task": "kmmlu_korean_history", + "tag": "kmmlu_humss_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Korean-History", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_law": { + "task": "kmmlu_law", + "tag": "kmmlu_humss_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Law", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_machine_design_and_manufacturing": { + "task": "kmmlu_machine_design_and_manufacturing", + "tag": "kmmlu_applied_science_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Machine-Design-and-Manufacturing", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_management": { + "task": "kmmlu_management", + "tag": "kmmlu_humss_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Management", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_maritime_engineering": { + "task": "kmmlu_maritime_engineering", + "tag": "kmmlu_applied_science_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Maritime-Engineering", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_marketing": { + "task": "kmmlu_marketing", + "tag": "kmmlu_other_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Marketing", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_materials_engineering": { + "task": "kmmlu_materials_engineering", + "tag": "kmmlu_stem_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Materials-Engineering", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_math": { + "task": "kmmlu_math", + "tag": "kmmlu_stem_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Math", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_mechanical_engineering": { + "task": "kmmlu_mechanical_engineering", + "tag": "kmmlu_stem_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Mechanical-Engineering", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_nondestructive_testing": { + "task": "kmmlu_nondestructive_testing", + "tag": "kmmlu_applied_science_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Nondestructive-Testing", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_patent": { + "task": "kmmlu_patent", + "tag": "kmmlu_other_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Patent", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_political_science_and_sociology": { + "task": "kmmlu_political_science_and_sociology", + "tag": "kmmlu_humss_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Political-Science-and-Sociology", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_psychology": { + "task": "kmmlu_psychology", + "tag": "kmmlu_humss_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Psychology", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_public_safety": { + "task": "kmmlu_public_safety", + "tag": "kmmlu_other_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Public-Safety", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_railway_and_automotive_engineering": { + "task": "kmmlu_railway_and_automotive_engineering", + "tag": "kmmlu_applied_science_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Railway-and-Automotive-Engineering", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_real_estate": { + "task": "kmmlu_real_estate", + "tag": "kmmlu_other_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Real-Estate", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_refrigerating_machinery": { + "task": "kmmlu_refrigerating_machinery", + "tag": "kmmlu_other_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Refrigerating-Machinery", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_social_welfare": { + "task": "kmmlu_social_welfare", + "tag": "kmmlu_humss_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Social-Welfare", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_taxation": { + "task": "kmmlu_taxation", + "tag": "kmmlu_humss_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Taxation", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kmmlu_telecommunications_and_wireless_technology": { + "task": "kmmlu_telecommunications_and_wireless_technology", + "tag": "kmmlu_applied_science_tasks", + "dataset_path": "HAERAE-HUB/KMMLU", + "dataset_name": "Telecommunications-and-Wireless-Technology", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_target": "{{answer-1}}", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "{{answer-1}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 2.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kobest_boolq": { + "task": "kobest_boolq", + "dataset_path": "skt/kobest_v1", + "dataset_name": "boolq", + "training_split": "train", + "validation_split": "validation", + "test_split": "test", + "doc_to_text": "{{paragraph}} 질문: {{question}} 답변: ", + "doc_to_target": "{{label}}", + "unsafe_code": false, + "doc_to_choice": [ + "아니오", + "예" + ], + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": null, + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{paragraph}} 질문: {{question}} 답변: ", + "doc_to_choice": [ + "아니오", + "예" + ], + "doc_to_target": "{{label}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + }, + { + "metric": "f1", + "aggregation": "def macro_f1_score(items):\n from sklearn.metrics import f1_score\n\n unzipped_list = list(zip(*items))\n golds = unzipped_list[0]\n preds = unzipped_list[1]\n fscore = f1_score(golds, preds, average=\"macro\")\n return fscore\n", + "average": "macro", + "hf_evaluate": true, + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kobest_copa": { + "task": "kobest_copa", + "dataset_path": "skt/kobest_v1", + "dataset_name": "copa", + "training_split": "train", + "validation_split": "validation", + "test_split": "test", + "doc_to_text": "def copa_doc_to_text(doc: dict) -> str:\n connector = {\"원인\": \" 왜냐하면\", \"결과\": \" 그래서\"}[doc[\"question\"].strip()]\n return f\"\"\"{doc[\"premise\"]} {connector}\"\"\"\n", + "doc_to_target": "def copa_doc_to_target(doc: dict) -> str:\n correct_choice = doc[\"alternative_1\"] if doc[\"label\"] == 0 else doc[\"alternative_2\"]\n return f\"\"\"{correct_choice}\"\"\"\n", + "unsafe_code": false, + "doc_to_choice": "def copa_doc_to_choice(doc: dict) -> list:\n return [f\"\"\"{doc[\"alternative_1\"]}\"\"\", f\"\"\"{doc[\"alternative_2\"]}\"\"\"]\n", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": null, + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "", + "doc_to_choice": "", + "doc_to_target": "", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + }, + { + "metric": "f1", + "aggregation": "def macro_f1_score(items):\n from sklearn.metrics import f1_score\n\n unzipped_list = list(zip(*items))\n golds = unzipped_list[0]\n preds = unzipped_list[1]\n fscore = f1_score(golds, preds, average=\"macro\")\n return fscore\n", + "average": "macro", + "hf_evaluate": true, + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "kobest_hellaswag": { + "task": "kobest_hellaswag", + "dataset_path": "skt/kobest_v1", + "dataset_name": "hellaswag", + "training_split": "train", + "validation_split": "validation", + "test_split": "test", + "process_docs": "def hellaswag_process_doc(doc: Dataset) -> Dataset:\n def preprocessor(dataset):\n return {\n \"query\": f\"\"\"문장: {dataset[\"context\"]}\"\"\",\n \"choices\": [\n dataset[\"ending_1\"],\n dataset[\"ending_2\"],\n dataset[\"ending_3\"],\n dataset[\"ending_4\"],\n ],\n \"gold\": int(dataset[\"label\"]),\n }\n\n return doc.map(preprocessor)\n", + "doc_to_text": "{{query}}", + "doc_to_target": "{{label}}", + "unsafe_code": false, + "doc_to_choice": "choices", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": null, + "process_docs": "", + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{query}}", + "doc_to_choice": "choices", + "doc_to_target": "{{label}}", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + }, + { + "metric": "acc_norm", + "aggregation": "mean", + "higher_is_better": true + }, + { + "metric": "f1", + "aggregation": "def macro_f1_score(items):\n from sklearn.metrics import f1_score\n\n unzipped_list = list(zip(*items))\n golds = unzipped_list[0]\n preds = unzipped_list[1]\n fscore = f1_score(golds, preds, average=\"macro\")\n return fscore\n", + "average": "macro", + "hf_evaluate": true, + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_abstract_algebra": { + "task": "mmlu_abstract_algebra", + "task_alias": "abstract_algebra", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "abstract_algebra", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_anatomy": { + "task": "mmlu_anatomy", + "task_alias": "anatomy", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "anatomy", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about anatomy.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_astronomy": { + "task": "mmlu_astronomy", + "task_alias": "astronomy", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "astronomy", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about astronomy.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_business_ethics": { + "task": "mmlu_business_ethics", + "task_alias": "business_ethics", + "tag": "mmlu_other_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "business_ethics", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about business ethics.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_clinical_knowledge": { + "task": "mmlu_clinical_knowledge", + "task_alias": "clinical_knowledge", + "tag": "mmlu_other_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "clinical_knowledge", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_college_biology": { + "task": "mmlu_college_biology", + "task_alias": "college_biology", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "college_biology", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about college biology.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_college_chemistry": { + "task": "mmlu_college_chemistry", + "task_alias": "college_chemistry", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "college_chemistry", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about college chemistry.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_college_computer_science": { + "task": "mmlu_college_computer_science", + "task_alias": "college_computer_science", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "college_computer_science", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about college computer science.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_college_mathematics": { + "task": "mmlu_college_mathematics", + "task_alias": "college_mathematics", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "college_mathematics", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about college mathematics.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_college_medicine": { + "task": "mmlu_college_medicine", + "task_alias": "college_medicine", + "tag": "mmlu_other_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "college_medicine", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about college medicine.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_college_physics": { + "task": "mmlu_college_physics", + "task_alias": "college_physics", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "college_physics", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about college physics.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_computer_security": { + "task": "mmlu_computer_security", + "task_alias": "computer_security", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "computer_security", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about computer security.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_conceptual_physics": { + "task": "mmlu_conceptual_physics", + "task_alias": "conceptual_physics", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "conceptual_physics", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_econometrics": { + "task": "mmlu_econometrics", + "task_alias": "econometrics", + "tag": "mmlu_social_sciences_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "econometrics", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about econometrics.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_electrical_engineering": { + "task": "mmlu_electrical_engineering", + "task_alias": "electrical_engineering", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "electrical_engineering", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_elementary_mathematics": { + "task": "mmlu_elementary_mathematics", + "task_alias": "elementary_mathematics", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "elementary_mathematics", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_formal_logic": { + "task": "mmlu_formal_logic", + "task_alias": "formal_logic", + "tag": "mmlu_humanities_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "formal_logic", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about formal logic.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_global_facts": { + "task": "mmlu_global_facts", + "task_alias": "global_facts", + "tag": "mmlu_other_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "global_facts", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about global facts.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_high_school_biology": { + "task": "mmlu_high_school_biology", + "task_alias": "high_school_biology", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "high_school_biology", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about high school biology.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_high_school_chemistry": { + "task": "mmlu_high_school_chemistry", + "task_alias": "high_school_chemistry", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "high_school_chemistry", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_high_school_computer_science": { + "task": "mmlu_high_school_computer_science", + "task_alias": "high_school_computer_science", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "high_school_computer_science", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about high school computer science.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_high_school_european_history": { + "task": "mmlu_high_school_european_history", + "task_alias": "high_school_european_history", + "tag": "mmlu_humanities_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "high_school_european_history", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about high school european history.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_high_school_geography": { + "task": "mmlu_high_school_geography", + "task_alias": "high_school_geography", + "tag": "mmlu_social_sciences_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "high_school_geography", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about high school geography.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_high_school_government_and_politics": { + "task": "mmlu_high_school_government_and_politics", + "task_alias": "high_school_government_and_politics", + "tag": "mmlu_social_sciences_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "high_school_government_and_politics", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_high_school_macroeconomics": { + "task": "mmlu_high_school_macroeconomics", + "task_alias": "high_school_macroeconomics", + "tag": "mmlu_social_sciences_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "high_school_macroeconomics", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_high_school_mathematics": { + "task": "mmlu_high_school_mathematics", + "task_alias": "high_school_mathematics", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "high_school_mathematics", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_high_school_microeconomics": { + "task": "mmlu_high_school_microeconomics", + "task_alias": "high_school_microeconomics", + "tag": "mmlu_social_sciences_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "high_school_microeconomics", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_high_school_physics": { + "task": "mmlu_high_school_physics", + "task_alias": "high_school_physics", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "high_school_physics", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about high school physics.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_high_school_psychology": { + "task": "mmlu_high_school_psychology", + "task_alias": "high_school_psychology", + "tag": "mmlu_social_sciences_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "high_school_psychology", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about high school psychology.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_high_school_statistics": { + "task": "mmlu_high_school_statistics", + "task_alias": "high_school_statistics", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "high_school_statistics", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about high school statistics.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_high_school_us_history": { + "task": "mmlu_high_school_us_history", + "task_alias": "high_school_us_history", + "tag": "mmlu_humanities_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "high_school_us_history", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about high school us history.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_high_school_world_history": { + "task": "mmlu_high_school_world_history", + "task_alias": "high_school_world_history", + "tag": "mmlu_humanities_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "high_school_world_history", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about high school world history.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_human_aging": { + "task": "mmlu_human_aging", + "task_alias": "human_aging", + "tag": "mmlu_other_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "human_aging", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about human aging.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_human_sexuality": { + "task": "mmlu_human_sexuality", + "task_alias": "human_sexuality", + "tag": "mmlu_social_sciences_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "human_sexuality", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about human sexuality.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_international_law": { + "task": "mmlu_international_law", + "task_alias": "international_law", + "tag": "mmlu_humanities_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "international_law", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about international law.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_jurisprudence": { + "task": "mmlu_jurisprudence", + "task_alias": "jurisprudence", + "tag": "mmlu_humanities_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "jurisprudence", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_logical_fallacies": { + "task": "mmlu_logical_fallacies", + "task_alias": "logical_fallacies", + "tag": "mmlu_humanities_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "logical_fallacies", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_machine_learning": { + "task": "mmlu_machine_learning", + "task_alias": "machine_learning", + "tag": "mmlu_stem_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "machine_learning", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about machine learning.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_management": { + "task": "mmlu_management", + "task_alias": "management", + "tag": "mmlu_other_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "management", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about management.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_marketing": { + "task": "mmlu_marketing", + "task_alias": "marketing", + "tag": "mmlu_other_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "marketing", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about marketing.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_medical_genetics": { + "task": "mmlu_medical_genetics", + "task_alias": "medical_genetics", + "tag": "mmlu_other_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "medical_genetics", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about medical genetics.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_miscellaneous": { + "task": "mmlu_miscellaneous", + "task_alias": "miscellaneous", + "tag": "mmlu_other_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "miscellaneous", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_moral_disputes": { + "task": "mmlu_moral_disputes", + "task_alias": "moral_disputes", + "tag": "mmlu_humanities_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "moral_disputes", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about moral disputes.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_moral_scenarios": { + "task": "mmlu_moral_scenarios", + "task_alias": "moral_scenarios", + "tag": "mmlu_humanities_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "moral_scenarios", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_nutrition": { + "task": "mmlu_nutrition", + "task_alias": "nutrition", + "tag": "mmlu_other_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "nutrition", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about nutrition.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_philosophy": { + "task": "mmlu_philosophy", + "task_alias": "philosophy", + "tag": "mmlu_humanities_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "philosophy", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about philosophy.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_prehistory": { + "task": "mmlu_prehistory", + "task_alias": "prehistory", + "tag": "mmlu_humanities_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "prehistory", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about prehistory.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_professional_accounting": { + "task": "mmlu_professional_accounting", + "task_alias": "professional_accounting", + "tag": "mmlu_other_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "professional_accounting", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about professional accounting.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_professional_law": { + "task": "mmlu_professional_law", + "task_alias": "professional_law", + "tag": "mmlu_humanities_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "professional_law", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about professional law.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_professional_medicine": { + "task": "mmlu_professional_medicine", + "task_alias": "professional_medicine", + "tag": "mmlu_other_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "professional_medicine", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about professional medicine.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_professional_psychology": { + "task": "mmlu_professional_psychology", + "task_alias": "professional_psychology", + "tag": "mmlu_social_sciences_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "professional_psychology", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about professional psychology.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_public_relations": { + "task": "mmlu_public_relations", + "task_alias": "public_relations", + "tag": "mmlu_social_sciences_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "public_relations", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about public relations.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_security_studies": { + "task": "mmlu_security_studies", + "task_alias": "security_studies", + "tag": "mmlu_social_sciences_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "security_studies", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about security studies.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_sociology": { + "task": "mmlu_sociology", + "task_alias": "sociology", + "tag": "mmlu_social_sciences_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "sociology", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about sociology.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_us_foreign_policy": { + "task": "mmlu_us_foreign_policy", + "task_alias": "us_foreign_policy", + "tag": "mmlu_social_sciences_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "us_foreign_policy", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_virology": { + "task": "mmlu_virology", + "task_alias": "virology", + "tag": "mmlu_other_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "virology", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about virology.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "mmlu_world_religions": { + "task": "mmlu_world_religions", + "task_alias": "world_religions", + "tag": "mmlu_humanities_tasks", + "dataset_path": "cais/mmlu", + "dataset_name": "world_religions", + "test_split": "test", + "fewshot_split": "dev", + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_target": "answer", + "unsafe_code": false, + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "description": "The following are multiple choice questions (with answers) about world religions.\n\n", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "first_n", + "split": "dev", + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:", + "doc_to_choice": [ + "A", + "B", + "C", + "D" + ], + "doc_to_target": "answer", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": false, + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + }, + "winogrande": { + "task": "winogrande", + "dataset_path": "allenai/winogrande", + "dataset_name": "winogrande_xl", + "training_split": "train", + "validation_split": "validation", + "doc_to_text": "def doc_to_text(doc):\n answer_to_num = {\"1\": 0, \"2\": 1}\n return answer_to_num[doc[\"answer\"]]\n", + "doc_to_target": "def doc_to_target(doc):\n idx = doc[\"sentence\"].index(\"_\") + 1\n return doc[\"sentence\"][idx:].strip()\n", + "unsafe_code": false, + "doc_to_choice": "def doc_to_choice(doc):\n idx = doc[\"sentence\"].index(\"_\")\n options = [doc[\"option1\"], doc[\"option2\"]]\n return [doc[\"sentence\"][:idx] + opt for opt in options]\n", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "fewshot_config": { + "sampler": "default", + "split": null, + "process_docs": null, + "fewshot_indices": null, + "samples": null, + "doc_to_text": "", + "doc_to_choice": "", + "doc_to_target": "", + "gen_prefix": null, + "fewshot_delimiter": "\n\n", + "target_delimiter": " " + }, + "num_fewshot": 0, + "metric_list": [ + { + "metric": "acc", + "aggregation": "mean", + "higher_is_better": true + } + ], + "output_type": "multiple_choice", + "repeats": 1, + "should_decontaminate": true, + "doc_to_decontamination_query": "sentence", + "metadata": { + "version": 1.0, + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + } + } + }, + "versions": { + "arc_challenge": 1.0, + "arc_easy": 1.0, + "hellaswag": 1.0, + "kmmlu": 2.0, + "kmmlu_accounting": 2.0, + "kmmlu_agricultural_sciences": 2.0, + "kmmlu_applied_science": 2.0, + "kmmlu_aviation_engineering_and_maintenance": 2.0, + "kmmlu_biology": 2.0, + "kmmlu_chemical_engineering": 2.0, + "kmmlu_chemistry": 2.0, + "kmmlu_civil_engineering": 2.0, + "kmmlu_computer_science": 2.0, + "kmmlu_construction": 2.0, + "kmmlu_criminal_law": 2.0, + "kmmlu_ecology": 2.0, + "kmmlu_economics": 2.0, + "kmmlu_education": 2.0, + "kmmlu_electrical_engineering": 2.0, + "kmmlu_electronics_engineering": 2.0, + "kmmlu_energy_management": 2.0, + "kmmlu_environmental_science": 2.0, + "kmmlu_fashion": 2.0, + "kmmlu_food_processing": 2.0, + "kmmlu_gas_technology_and_engineering": 2.0, + "kmmlu_geomatics": 2.0, + "kmmlu_health": 2.0, + "kmmlu_humss": 2.0, + "kmmlu_industrial_engineer": 2.0, + "kmmlu_information_technology": 2.0, + "kmmlu_interior_architecture_and_design": 2.0, + "kmmlu_korean_history": 2.0, + "kmmlu_law": 2.0, + "kmmlu_machine_design_and_manufacturing": 2.0, + "kmmlu_management": 2.0, + "kmmlu_maritime_engineering": 2.0, + "kmmlu_marketing": 2.0, + "kmmlu_materials_engineering": 2.0, + "kmmlu_math": 2.0, + "kmmlu_mechanical_engineering": 2.0, + "kmmlu_nondestructive_testing": 2.0, + "kmmlu_other": 2.0, + "kmmlu_patent": 2.0, + "kmmlu_political_science_and_sociology": 2.0, + "kmmlu_psychology": 2.0, + "kmmlu_public_safety": 2.0, + "kmmlu_railway_and_automotive_engineering": 2.0, + "kmmlu_real_estate": 2.0, + "kmmlu_refrigerating_machinery": 2.0, + "kmmlu_social_welfare": 2.0, + "kmmlu_stem": 2.0, + "kmmlu_taxation": 2.0, + "kmmlu_telecommunications_and_wireless_technology": 2.0, + "kobest_boolq": 1.0, + "kobest_copa": 1.0, + "kobest_hellaswag": 1.0, + "mmlu": 2, + "mmlu_abstract_algebra": 1.0, + "mmlu_anatomy": 1.0, + "mmlu_astronomy": 1.0, + "mmlu_business_ethics": 1.0, + "mmlu_clinical_knowledge": 1.0, + "mmlu_college_biology": 1.0, + "mmlu_college_chemistry": 1.0, + "mmlu_college_computer_science": 1.0, + "mmlu_college_mathematics": 1.0, + "mmlu_college_medicine": 1.0, + "mmlu_college_physics": 1.0, + "mmlu_computer_security": 1.0, + "mmlu_conceptual_physics": 1.0, + "mmlu_econometrics": 1.0, + "mmlu_electrical_engineering": 1.0, + "mmlu_elementary_mathematics": 1.0, + "mmlu_formal_logic": 1.0, + "mmlu_global_facts": 1.0, + "mmlu_high_school_biology": 1.0, + "mmlu_high_school_chemistry": 1.0, + "mmlu_high_school_computer_science": 1.0, + "mmlu_high_school_european_history": 1.0, + "mmlu_high_school_geography": 1.0, + "mmlu_high_school_government_and_politics": 1.0, + "mmlu_high_school_macroeconomics": 1.0, + "mmlu_high_school_mathematics": 1.0, + "mmlu_high_school_microeconomics": 1.0, + "mmlu_high_school_physics": 1.0, + "mmlu_high_school_psychology": 1.0, + "mmlu_high_school_statistics": 1.0, + "mmlu_high_school_us_history": 1.0, + "mmlu_high_school_world_history": 1.0, + "mmlu_human_aging": 1.0, + "mmlu_human_sexuality": 1.0, + "mmlu_humanities": 2, + "mmlu_international_law": 1.0, + "mmlu_jurisprudence": 1.0, + "mmlu_logical_fallacies": 1.0, + "mmlu_machine_learning": 1.0, + "mmlu_management": 1.0, + "mmlu_marketing": 1.0, + "mmlu_medical_genetics": 1.0, + "mmlu_miscellaneous": 1.0, + "mmlu_moral_disputes": 1.0, + "mmlu_moral_scenarios": 1.0, + "mmlu_nutrition": 1.0, + "mmlu_other": 2, + "mmlu_philosophy": 1.0, + "mmlu_prehistory": 1.0, + "mmlu_professional_accounting": 1.0, + "mmlu_professional_law": 1.0, + "mmlu_professional_medicine": 1.0, + "mmlu_professional_psychology": 1.0, + "mmlu_public_relations": 1.0, + "mmlu_security_studies": 1.0, + "mmlu_social_sciences": 2, + "mmlu_sociology": 1.0, + "mmlu_stem": 2, + "mmlu_us_foreign_policy": 1.0, + "mmlu_virology": 1.0, + "mmlu_world_religions": 1.0, + "winogrande": 1.0 + }, + "n-shot": { + "arc_challenge": 0, + "arc_easy": 0, + "hellaswag": 0, + "kmmlu_accounting": 0, + "kmmlu_agricultural_sciences": 0, + "kmmlu_aviation_engineering_and_maintenance": 0, + "kmmlu_biology": 0, + "kmmlu_chemical_engineering": 0, + "kmmlu_chemistry": 0, + "kmmlu_civil_engineering": 0, + "kmmlu_computer_science": 0, + "kmmlu_construction": 0, + "kmmlu_criminal_law": 0, + "kmmlu_ecology": 0, + "kmmlu_economics": 0, + "kmmlu_education": 0, + "kmmlu_electrical_engineering": 0, + "kmmlu_electronics_engineering": 0, + "kmmlu_energy_management": 0, + "kmmlu_environmental_science": 0, + "kmmlu_fashion": 0, + "kmmlu_food_processing": 0, + "kmmlu_gas_technology_and_engineering": 0, + "kmmlu_geomatics": 0, + "kmmlu_health": 0, + "kmmlu_industrial_engineer": 0, + "kmmlu_information_technology": 0, + "kmmlu_interior_architecture_and_design": 0, + "kmmlu_korean_history": 0, + "kmmlu_law": 0, + "kmmlu_machine_design_and_manufacturing": 0, + "kmmlu_management": 0, + "kmmlu_maritime_engineering": 0, + "kmmlu_marketing": 0, + "kmmlu_materials_engineering": 0, + "kmmlu_math": 0, + "kmmlu_mechanical_engineering": 0, + "kmmlu_nondestructive_testing": 0, + "kmmlu_patent": 0, + "kmmlu_political_science_and_sociology": 0, + "kmmlu_psychology": 0, + "kmmlu_public_safety": 0, + "kmmlu_railway_and_automotive_engineering": 0, + "kmmlu_real_estate": 0, + "kmmlu_refrigerating_machinery": 0, + "kmmlu_social_welfare": 0, + "kmmlu_taxation": 0, + "kmmlu_telecommunications_and_wireless_technology": 0, + "kobest_boolq": 0, + "kobest_copa": 0, + "kobest_hellaswag": 0, + "mmlu_abstract_algebra": 0, + "mmlu_anatomy": 0, + "mmlu_astronomy": 0, + "mmlu_business_ethics": 0, + "mmlu_clinical_knowledge": 0, + "mmlu_college_biology": 0, + "mmlu_college_chemistry": 0, + "mmlu_college_computer_science": 0, + "mmlu_college_mathematics": 0, + "mmlu_college_medicine": 0, + "mmlu_college_physics": 0, + "mmlu_computer_security": 0, + "mmlu_conceptual_physics": 0, + "mmlu_econometrics": 0, + "mmlu_electrical_engineering": 0, + "mmlu_elementary_mathematics": 0, + "mmlu_formal_logic": 0, + "mmlu_global_facts": 0, + "mmlu_high_school_biology": 0, + "mmlu_high_school_chemistry": 0, + "mmlu_high_school_computer_science": 0, + "mmlu_high_school_european_history": 0, + "mmlu_high_school_geography": 0, + "mmlu_high_school_government_and_politics": 0, + "mmlu_high_school_macroeconomics": 0, + "mmlu_high_school_mathematics": 0, + "mmlu_high_school_microeconomics": 0, + "mmlu_high_school_physics": 0, + "mmlu_high_school_psychology": 0, + "mmlu_high_school_statistics": 0, + "mmlu_high_school_us_history": 0, + "mmlu_high_school_world_history": 0, + "mmlu_human_aging": 0, + "mmlu_human_sexuality": 0, + "mmlu_international_law": 0, + "mmlu_jurisprudence": 0, + "mmlu_logical_fallacies": 0, + "mmlu_machine_learning": 0, + "mmlu_management": 0, + "mmlu_marketing": 0, + "mmlu_medical_genetics": 0, + "mmlu_miscellaneous": 0, + "mmlu_moral_disputes": 0, + "mmlu_moral_scenarios": 0, + "mmlu_nutrition": 0, + "mmlu_philosophy": 0, + "mmlu_prehistory": 0, + "mmlu_professional_accounting": 0, + "mmlu_professional_law": 0, + "mmlu_professional_medicine": 0, + "mmlu_professional_psychology": 0, + "mmlu_public_relations": 0, + "mmlu_security_studies": 0, + "mmlu_sociology": 0, + "mmlu_us_foreign_policy": 0, + "mmlu_virology": 0, + "mmlu_world_religions": 0, + "winogrande": 0 + }, + "higher_is_better": { + "arc_challenge": { + "acc": true, + "acc_norm": true + }, + "arc_easy": { + "acc": true, + "acc_norm": true + }, + "hellaswag": { + "acc": true, + "acc_norm": true + }, + "kmmlu": { + "acc": true + }, + "kmmlu_accounting": { + "acc": true + }, + "kmmlu_agricultural_sciences": { + "acc": true + }, + "kmmlu_applied_science": { + "acc": true + }, + "kmmlu_aviation_engineering_and_maintenance": { + "acc": true + }, + "kmmlu_biology": { + "acc": true + }, + "kmmlu_chemical_engineering": { + "acc": true + }, + "kmmlu_chemistry": { + "acc": true + }, + "kmmlu_civil_engineering": { + "acc": true + }, + "kmmlu_computer_science": { + "acc": true + }, + "kmmlu_construction": { + "acc": true + }, + "kmmlu_criminal_law": { + "acc": true + }, + "kmmlu_ecology": { + "acc": true + }, + "kmmlu_economics": { + "acc": true + }, + "kmmlu_education": { + "acc": true + }, + "kmmlu_electrical_engineering": { + "acc": true + }, + "kmmlu_electronics_engineering": { + "acc": true + }, + "kmmlu_energy_management": { + "acc": true + }, + "kmmlu_environmental_science": { + "acc": true + }, + "kmmlu_fashion": { + "acc": true + }, + "kmmlu_food_processing": { + "acc": true + }, + "kmmlu_gas_technology_and_engineering": { + "acc": true + }, + "kmmlu_geomatics": { + "acc": true + }, + "kmmlu_health": { + "acc": true + }, + "kmmlu_humss": { + "acc": true + }, + "kmmlu_industrial_engineer": { + "acc": true + }, + "kmmlu_information_technology": { + "acc": true + }, + "kmmlu_interior_architecture_and_design": { + "acc": true + }, + "kmmlu_korean_history": { + "acc": true + }, + "kmmlu_law": { + "acc": true + }, + "kmmlu_machine_design_and_manufacturing": { + "acc": true + }, + "kmmlu_management": { + "acc": true + }, + "kmmlu_maritime_engineering": { + "acc": true + }, + "kmmlu_marketing": { + "acc": true + }, + "kmmlu_materials_engineering": { + "acc": true + }, + "kmmlu_math": { + "acc": true + }, + "kmmlu_mechanical_engineering": { + "acc": true + }, + "kmmlu_nondestructive_testing": { + "acc": true + }, + "kmmlu_other": { + "acc": true + }, + "kmmlu_patent": { + "acc": true + }, + "kmmlu_political_science_and_sociology": { + "acc": true + }, + "kmmlu_psychology": { + "acc": true + }, + "kmmlu_public_safety": { + "acc": true + }, + "kmmlu_railway_and_automotive_engineering": { + "acc": true + }, + "kmmlu_real_estate": { + "acc": true + }, + "kmmlu_refrigerating_machinery": { + "acc": true + }, + "kmmlu_social_welfare": { + "acc": true + }, + "kmmlu_stem": { + "acc": true + }, + "kmmlu_taxation": { + "acc": true + }, + "kmmlu_telecommunications_and_wireless_technology": { + "acc": true + }, + "kobest_boolq": { + "acc": true, + "f1": true + }, + "kobest_copa": { + "acc": true, + "f1": true + }, + "kobest_hellaswag": { + "acc": true, + "acc_norm": true, + "f1": true + }, + "mmlu": { + "acc": true + }, + "mmlu_abstract_algebra": { + "acc": true + }, + "mmlu_anatomy": { + "acc": true + }, + "mmlu_astronomy": { + "acc": true + }, + "mmlu_business_ethics": { + "acc": true + }, + "mmlu_clinical_knowledge": { + "acc": true + }, + "mmlu_college_biology": { + "acc": true + }, + "mmlu_college_chemistry": { + "acc": true + }, + "mmlu_college_computer_science": { + "acc": true + }, + "mmlu_college_mathematics": { + "acc": true + }, + "mmlu_college_medicine": { + "acc": true + }, + "mmlu_college_physics": { + "acc": true + }, + "mmlu_computer_security": { + "acc": true + }, + "mmlu_conceptual_physics": { + "acc": true + }, + "mmlu_econometrics": { + "acc": true + }, + "mmlu_electrical_engineering": { + "acc": true + }, + "mmlu_elementary_mathematics": { + "acc": true + }, + "mmlu_formal_logic": { + "acc": true + }, + "mmlu_global_facts": { + "acc": true + }, + "mmlu_high_school_biology": { + "acc": true + }, + "mmlu_high_school_chemistry": { + "acc": true + }, + "mmlu_high_school_computer_science": { + "acc": true + }, + "mmlu_high_school_european_history": { + "acc": true + }, + "mmlu_high_school_geography": { + "acc": true + }, + "mmlu_high_school_government_and_politics": { + "acc": true + }, + "mmlu_high_school_macroeconomics": { + "acc": true + }, + "mmlu_high_school_mathematics": { + "acc": true + }, + "mmlu_high_school_microeconomics": { + "acc": true + }, + "mmlu_high_school_physics": { + "acc": true + }, + "mmlu_high_school_psychology": { + "acc": true + }, + "mmlu_high_school_statistics": { + "acc": true + }, + "mmlu_high_school_us_history": { + "acc": true + }, + "mmlu_high_school_world_history": { + "acc": true + }, + "mmlu_human_aging": { + "acc": true + }, + "mmlu_human_sexuality": { + "acc": true + }, + "mmlu_humanities": { + "acc": true + }, + "mmlu_international_law": { + "acc": true + }, + "mmlu_jurisprudence": { + "acc": true + }, + "mmlu_logical_fallacies": { + "acc": true + }, + "mmlu_machine_learning": { + "acc": true + }, + "mmlu_management": { + "acc": true + }, + "mmlu_marketing": { + "acc": true + }, + "mmlu_medical_genetics": { + "acc": true + }, + "mmlu_miscellaneous": { + "acc": true + }, + "mmlu_moral_disputes": { + "acc": true + }, + "mmlu_moral_scenarios": { + "acc": true + }, + "mmlu_nutrition": { + "acc": true + }, + "mmlu_other": { + "acc": true + }, + "mmlu_philosophy": { + "acc": true + }, + "mmlu_prehistory": { + "acc": true + }, + "mmlu_professional_accounting": { + "acc": true + }, + "mmlu_professional_law": { + "acc": true + }, + "mmlu_professional_medicine": { + "acc": true + }, + "mmlu_professional_psychology": { + "acc": true + }, + "mmlu_public_relations": { + "acc": true + }, + "mmlu_security_studies": { + "acc": true + }, + "mmlu_social_sciences": { + "acc": true + }, + "mmlu_sociology": { + "acc": true + }, + "mmlu_stem": { + "acc": true + }, + "mmlu_us_foreign_policy": { + "acc": true + }, + "mmlu_virology": { + "acc": true + }, + "mmlu_world_religions": { + "acc": true + }, + "winogrande": { + "acc": true + } + }, + "n-samples": { + "kobest_hellaswag": { + "original": 500, + "effective": 400 + }, + "kobest_copa": { + "original": 1000, + "effective": 400 + }, + "kobest_boolq": { + "original": 1404, + "effective": 400 + }, + "kmmlu_biology": { + "original": 1000, + "effective": 400 + }, + "kmmlu_chemical_engineering": { + "original": 1000, + "effective": 400 + }, + "kmmlu_chemistry": { + "original": 600, + "effective": 400 + }, + "kmmlu_civil_engineering": { + "original": 1000, + "effective": 400 + }, + "kmmlu_computer_science": { + "original": 1000, + "effective": 400 + }, + "kmmlu_ecology": { + "original": 1000, + "effective": 400 + }, + "kmmlu_electrical_engineering": { + "original": 1000, + "effective": 400 + }, + "kmmlu_information_technology": { + "original": 1000, + "effective": 400 + }, + "kmmlu_materials_engineering": { + "original": 1000, + "effective": 400 + }, + "kmmlu_math": { + "original": 300, + "effective": 300 + }, + "kmmlu_mechanical_engineering": { + "original": 1000, + "effective": 400 + }, + "kmmlu_agricultural_sciences": { + "original": 1000, + "effective": 400 + }, + "kmmlu_construction": { + "original": 1000, + "effective": 400 + }, + "kmmlu_fashion": { + "original": 1000, + "effective": 400 + }, + "kmmlu_food_processing": { + "original": 1000, + "effective": 400 + }, + "kmmlu_health": { + "original": 100, + "effective": 100 + }, + "kmmlu_interior_architecture_and_design": { + "original": 1000, + "effective": 400 + }, + "kmmlu_marketing": { + "original": 1000, + "effective": 400 + }, + "kmmlu_patent": { + "original": 100, + "effective": 100 + }, + "kmmlu_public_safety": { + "original": 1000, + "effective": 400 + }, + "kmmlu_real_estate": { + "original": 200, + "effective": 200 + }, + "kmmlu_refrigerating_machinery": { + "original": 1000, + "effective": 400 + }, + "kmmlu_aviation_engineering_and_maintenance": { + "original": 1000, + "effective": 400 + }, + "kmmlu_electronics_engineering": { + "original": 1000, + "effective": 400 + }, + "kmmlu_energy_management": { + "original": 1000, + "effective": 400 + }, + "kmmlu_environmental_science": { + "original": 1000, + "effective": 400 + }, + "kmmlu_gas_technology_and_engineering": { + "original": 1000, + "effective": 400 + }, + "kmmlu_geomatics": { + "original": 1000, + "effective": 400 + }, + "kmmlu_industrial_engineer": { + "original": 1000, + "effective": 400 + }, + "kmmlu_machine_design_and_manufacturing": { + "original": 1000, + "effective": 400 + }, + "kmmlu_maritime_engineering": { + "original": 600, + "effective": 400 + }, + "kmmlu_nondestructive_testing": { + "original": 1000, + "effective": 400 + }, + "kmmlu_railway_and_automotive_engineering": { + "original": 1000, + "effective": 400 + }, + "kmmlu_telecommunications_and_wireless_technology": { + "original": 1000, + "effective": 400 + }, + "kmmlu_accounting": { + "original": 100, + "effective": 100 + }, + "kmmlu_criminal_law": { + "original": 200, + "effective": 200 + }, + "kmmlu_economics": { + "original": 130, + "effective": 130 + }, + "kmmlu_education": { + "original": 100, + "effective": 100 + }, + "kmmlu_korean_history": { + "original": 100, + "effective": 100 + }, + "kmmlu_law": { + "original": 1000, + "effective": 400 + }, + "kmmlu_management": { + "original": 1000, + "effective": 400 + }, + "kmmlu_political_science_and_sociology": { + "original": 300, + "effective": 300 + }, + "kmmlu_psychology": { + "original": 1000, + "effective": 400 + }, + "kmmlu_social_welfare": { + "original": 1000, + "effective": 400 + }, + "kmmlu_taxation": { + "original": 200, + "effective": 200 + }, + "winogrande": { + "original": 1267, + "effective": 400 + }, + "arc_challenge": { + "original": 1172, + "effective": 400 + }, + "arc_easy": { + "original": 2376, + "effective": 400 + }, + "hellaswag": { + "original": 10042, + "effective": 400 + }, + "mmlu_abstract_algebra": { + "original": 100, + "effective": 100 + }, + "mmlu_anatomy": { + "original": 135, + "effective": 135 + }, + "mmlu_astronomy": { + "original": 152, + "effective": 152 + }, + "mmlu_college_biology": { + "original": 144, + "effective": 144 + }, + "mmlu_college_chemistry": { + "original": 100, + "effective": 100 + }, + "mmlu_college_computer_science": { + "original": 100, + "effective": 100 + }, + "mmlu_college_mathematics": { + "original": 100, + "effective": 100 + }, + "mmlu_college_physics": { + "original": 102, + "effective": 102 + }, + "mmlu_computer_security": { + "original": 100, + "effective": 100 + }, + "mmlu_conceptual_physics": { + "original": 235, + "effective": 235 + }, + "mmlu_electrical_engineering": { + "original": 145, + "effective": 145 + }, + "mmlu_elementary_mathematics": { + "original": 378, + "effective": 378 + }, + "mmlu_high_school_biology": { + "original": 310, + "effective": 310 + }, + "mmlu_high_school_chemistry": { + "original": 203, + "effective": 203 + }, + "mmlu_high_school_computer_science": { + "original": 100, + "effective": 100 + }, + "mmlu_high_school_mathematics": { + "original": 270, + "effective": 270 + }, + "mmlu_high_school_physics": { + "original": 151, + "effective": 151 + }, + "mmlu_high_school_statistics": { + "original": 216, + "effective": 216 + }, + "mmlu_machine_learning": { + "original": 112, + "effective": 112 + }, + "mmlu_business_ethics": { + "original": 100, + "effective": 100 + }, + "mmlu_clinical_knowledge": { + "original": 265, + "effective": 265 + }, + "mmlu_college_medicine": { + "original": 173, + "effective": 173 + }, + "mmlu_global_facts": { + "original": 100, + "effective": 100 + }, + "mmlu_human_aging": { + "original": 223, + "effective": 223 + }, + "mmlu_management": { + "original": 103, + "effective": 103 + }, + "mmlu_marketing": { + "original": 234, + "effective": 234 + }, + "mmlu_medical_genetics": { + "original": 100, + "effective": 100 + }, + "mmlu_miscellaneous": { + "original": 783, + "effective": 400 + }, + "mmlu_nutrition": { + "original": 306, + "effective": 306 + }, + "mmlu_professional_accounting": { + "original": 282, + "effective": 282 + }, + "mmlu_professional_medicine": { + "original": 272, + "effective": 272 + }, + "mmlu_virology": { + "original": 166, + "effective": 166 + }, + "mmlu_econometrics": { + "original": 114, + "effective": 114 + }, + "mmlu_high_school_geography": { + "original": 198, + "effective": 198 + }, + "mmlu_high_school_government_and_politics": { + "original": 193, + "effective": 193 + }, + "mmlu_high_school_macroeconomics": { + "original": 390, + "effective": 390 + }, + "mmlu_high_school_microeconomics": { + "original": 238, + "effective": 238 + }, + "mmlu_high_school_psychology": { + "original": 545, + "effective": 400 + }, + "mmlu_human_sexuality": { + "original": 131, + "effective": 131 + }, + "mmlu_professional_psychology": { + "original": 612, + "effective": 400 + }, + "mmlu_public_relations": { + "original": 110, + "effective": 110 + }, + "mmlu_security_studies": { + "original": 245, + "effective": 245 + }, + "mmlu_sociology": { + "original": 201, + "effective": 201 + }, + "mmlu_us_foreign_policy": { + "original": 100, + "effective": 100 + }, + "mmlu_formal_logic": { + "original": 126, + "effective": 126 + }, + "mmlu_high_school_european_history": { + "original": 165, + "effective": 165 + }, + "mmlu_high_school_us_history": { + "original": 204, + "effective": 204 + }, + "mmlu_high_school_world_history": { + "original": 237, + "effective": 237 + }, + "mmlu_international_law": { + "original": 121, + "effective": 121 + }, + "mmlu_jurisprudence": { + "original": 108, + "effective": 108 + }, + "mmlu_logical_fallacies": { + "original": 163, + "effective": 163 + }, + "mmlu_moral_disputes": { + "original": 346, + "effective": 346 + }, + "mmlu_moral_scenarios": { + "original": 895, + "effective": 400 + }, + "mmlu_philosophy": { + "original": 311, + "effective": 311 + }, + "mmlu_prehistory": { + "original": 324, + "effective": 324 + }, + "mmlu_professional_law": { + "original": 1534, + "effective": 400 + }, + "mmlu_world_religions": { + "original": 171, + "effective": 171 + } + }, + "config": { + "model": "hf", + "model_args": { + "pretrained": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "trust_remote_code": true + }, + "model_num_parameters": 4022468096, + "model_dtype": "torch.bfloat16", + "model_revision": "main", + "model_sha": "", + "batch_size": "12", + "batch_sizes": [], + "device": "cuda:0", + "use_cache": null, + "limit": 400.0, + "bootstrap_iters": 100000, + "gen_kwargs": {}, + "random_seed": 0, + "numpy_seed": 1234, + "torch_seed": 1234, + "fewshot_seed": 1234 + }, + "git_hash": "57af8ea", + "date": 1776063609.887477, + "pretty_env_info": "PyTorch version: 2.9.0+cu128\nIs debug build: False\nCUDA used to build PyTorch: 12.8\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.5 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: version 4.1.0\nLibc version: glibc-2.35\n\nPython version: 3.11.14 | packaged by conda-forge | (main, Oct 13 2025, 14:09:32) [GCC 14.3.0] (64-bit runtime)\nPython platform: Linux-6.8.0-107-generic-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: 12.8.93\nCUDA_MODULE_LOADING set to: \nGPU models and configuration: GPU 0: NVIDIA RTX PRO 6000 Blackwell Workstation Edition\nNvidia driver version: 580.65.06\ncuDNN version: Probably one of the following:\n/usr/lib/x86_64-linux-gnu/libcudnn.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_adv.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_cnn.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_engines_precompiled.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_engines_runtime_compiled.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_graph.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_heuristic.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_ops.so.9.8.0\nIs XPU available: False\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 48 bits physical, 48 bits virtual\nByte Order: Little Endian\nCPU(s): 32\nOn-line CPU(s) list: 0-31\nVendor ID: AuthenticAMD\nModel name: AMD Ryzen 9 9950X 16-Core Processor\nCPU family: 26\nModel: 68\nThread(s) per core: 2\nCore(s) per socket: 16\nSocket(s): 1\nStepping: 0\nCPU max MHz: 5752.0000\nCPU min MHz: 600.0000\nBogoMIPS: 8599.98\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good amd_lbr_v2 nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba perfmon_v2 ibrs ibpb stibp ibrs_enhanced vmmcall fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq adx smap avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local user_shstk avx_vnni avx512_bf16 clzero irperf xsaveerptr rdpru wbnoinvd cppc amd_ibpb_ret arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif x2avic v_spec_ctrl vnmi avx512vbmi umip pku ospke avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq rdpid bus_lock_detect movdiri movdir64b overflow_recov succor smca fsrm avx512_vp2intersect flush_l1d srso_user_kernel_no\nVirtualization: AMD-V\nL1d cache: 768 KiB (16 instances)\nL1i cache: 512 KiB (16 instances)\nL2 cache: 16 MiB (16 instances)\nL3 cache: 64 MiB (2 instances)\nNUMA node(s): 1\nNUMA node0 CPU(s): 0-31\nVulnerability Gather data sampling: Not affected\nVulnerability Indirect target selection: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Reg file data sampling: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced / Automatic IBRS; IBPB conditional; STIBP always-on; PBRSB-eIBRS Not affected; BHI Not affected\nVulnerability Srbds: Not affected\nVulnerability Tsa: Not affected\nVulnerability Tsx async abort: Not affected\nVulnerability Vmscape: Not affected\n\nVersions of relevant libraries:\n[pip3] executorch==1.0.1\n[pip3] numpy==2.2.6\n[pip3] nvidia-cublas-cu12==12.8.4.1\n[pip3] nvidia-cuda-cupti-cu12==12.8.90\n[pip3] nvidia-cuda-nvrtc-cu12==12.8.93\n[pip3] nvidia-cuda-runtime-cu12==12.8.90\n[pip3] nvidia-cudnn-cu12==9.10.2.21\n[pip3] nvidia-cudnn-frontend==1.17.0\n[pip3] nvidia-cufft-cu12==11.3.3.83\n[pip3] nvidia-curand-cu12==10.3.9.90\n[pip3] nvidia-cusolver-cu12==11.7.3.90\n[pip3] nvidia-cusparse-cu12==12.5.8.93\n[pip3] nvidia-cusparselt-cu12==0.7.1\n[pip3] nvidia-nccl-cu12==2.27.5\n[pip3] nvidia-nvjitlink-cu12==12.8.93\n[pip3] nvidia-nvtx-cu12==12.8.90\n[pip3] optree==0.17.0\n[pip3] pytorch_tokenizers==1.0.1\n[pip3] torch==2.9.0+cu128\n[pip3] torch_c_dlpack_ext==0.1.4\n[pip3] torch-stoi==0.2.3\n[pip3] torchao==0.14.0\n[pip3] torchaudio==2.9.0+cu128\n[pip3] torchcodec==0.9.1\n[pip3] torchelastic==0.2.2\n[pip3] torchvision==0.24.0+cu128\n[pip3] triton==3.5.0\n[pip3] triton_kernels==1.0.0\n[conda] No relevant packages", + "transformers_version": "5.5.3", + "lm_eval_version": "0.4.11", + "upper_git_hash": null, + "tokenizer_pad_token": [ + "<|PAD_TOKEN|>", + "151669" + ], + "tokenizer_eos_token": [ + "<|endoftext|>", + "151643" + ], + "tokenizer_bos_token": [ + null, + "None" + ], + "eot_token_id": 151643, + "max_length": 32768, + "task_hashes": {}, + "model_source": "hf", + "model_name": "/home/unsloth/scp_stage2_pd/artifacts/cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt/checkpoints", + "model_name_sanitized": "__home__unsloth__scp_stage2_pd__artifacts__cpt_parallel_full_96gb_alwaysgood_qwen3_4b_cpt__checkpoints", + "system_instruction": null, + "system_instruction_sha": null, + "fewshot_as_multiturn": null, + "chat_template": null, + "chat_template_sha": null, + "total_evaluation_time_seconds": "344.1437347389874" +} \ No newline at end of file diff --git a/eval/checkpoints/cpt/stdout.txt b/eval/checkpoints/cpt/stdout.txt new file mode 100644 index 0000000..50f2405 --- /dev/null +++ b/eval/checkpoints/cpt/stdout.txt @@ -0,0 +1,2460 @@ +Accessing `causal_conv1d_fn` from `.models.aria.image_processing_aria`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.aria.image_processing_aria`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.aria.image_processing_aria`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.aria.image_processing_aria`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.aria.image_processing_pil_aria`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.aria.image_processing_pil_aria`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.aria.image_processing_pil_aria`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.aria.image_processing_pil_aria`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.auto.image_processing_auto`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.auto.image_processing_auto`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.auto.image_processing_auto`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.auto.image_processing_auto`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.beit.image_processing_beit`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.beit.image_processing_beit`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.beit.image_processing_beit`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.beit.image_processing_beit`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.beit.image_processing_pil_beit`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.beit.image_processing_pil_beit`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.beit.image_processing_pil_beit`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.beit.image_processing_pil_beit`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.bit.image_processing_bit`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.bit.image_processing_bit`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.bit.image_processing_bit`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.bit.image_processing_bit`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.bit.image_processing_pil_bit`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.bit.image_processing_pil_bit`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.bit.image_processing_pil_bit`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.bit.image_processing_pil_bit`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.blip.image_processing_blip`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.blip.image_processing_blip`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.blip.image_processing_blip`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.blip.image_processing_blip`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.blip.image_processing_pil_blip`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.blip.image_processing_pil_blip`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.blip.image_processing_pil_blip`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.blip.image_processing_pil_blip`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.bridgetower.image_processing_bridgetower`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.bridgetower.image_processing_bridgetower`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.bridgetower.image_processing_bridgetower`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.bridgetower.image_processing_bridgetower`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.bridgetower.image_processing_pil_bridgetower`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.bridgetower.image_processing_pil_bridgetower`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.bridgetower.image_processing_pil_bridgetower`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.bridgetower.image_processing_pil_bridgetower`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.chameleon.image_processing_chameleon`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.chameleon.image_processing_chameleon`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.chameleon.image_processing_chameleon`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.chameleon.image_processing_chameleon`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.chameleon.image_processing_pil_chameleon`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.chameleon.image_processing_pil_chameleon`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.chameleon.image_processing_pil_chameleon`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.chameleon.image_processing_pil_chameleon`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.chinese_clip.image_processing_chinese_clip`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.chinese_clip.image_processing_chinese_clip`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.chinese_clip.image_processing_chinese_clip`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.chinese_clip.image_processing_chinese_clip`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.chinese_clip.image_processing_chinese_pil_clip`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.chinese_clip.image_processing_chinese_pil_clip`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.chinese_clip.image_processing_chinese_pil_clip`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.chinese_clip.image_processing_chinese_pil_clip`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.chmv2.image_processing_chmv2`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.chmv2.image_processing_chmv2`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.chmv2.image_processing_chmv2`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.chmv2.image_processing_chmv2`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.clip.image_processing_clip`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.clip.image_processing_clip`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.clip.image_processing_clip`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.clip.image_processing_clip`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.clip.image_processing_pil_clip`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.clip.image_processing_pil_clip`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.clip.image_processing_pil_clip`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.clip.image_processing_pil_clip`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.cohere2_vision.image_processing_cohere2_vision`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.cohere2_vision.image_processing_cohere2_vision`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.cohere2_vision.image_processing_cohere2_vision`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.cohere2_vision.image_processing_cohere2_vision`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.conditional_detr.image_processing_conditional_detr`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.conditional_detr.image_processing_conditional_detr`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.conditional_detr.image_processing_conditional_detr`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.conditional_detr.image_processing_conditional_detr`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.conditional_detr.image_processing_pil_conditional_detr`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.conditional_detr.image_processing_pil_conditional_detr`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.conditional_detr.image_processing_pil_conditional_detr`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.conditional_detr.image_processing_pil_conditional_detr`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.convnext.image_processing_convnext`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.convnext.image_processing_convnext`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.convnext.image_processing_convnext`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.convnext.image_processing_convnext`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.convnext.image_processing_pil_convnext`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.convnext.image_processing_pil_convnext`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.convnext.image_processing_pil_convnext`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.convnext.image_processing_pil_convnext`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.deepseek_vl.image_processing_deepseek_vl`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.deepseek_vl.image_processing_deepseek_vl`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.deepseek_vl.image_processing_deepseek_vl`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.deepseek_vl.image_processing_deepseek_vl`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.deepseek_vl.image_processing_pil_deepseek_vl`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.deepseek_vl.image_processing_pil_deepseek_vl`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.deepseek_vl.image_processing_pil_deepseek_vl`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.deepseek_vl.image_processing_pil_deepseek_vl`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.deepseek_vl_hybrid.image_processing_deepseek_vl_hybrid`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.deepseek_vl_hybrid.image_processing_deepseek_vl_hybrid`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.deepseek_vl_hybrid.image_processing_deepseek_vl_hybrid`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.deepseek_vl_hybrid.image_processing_deepseek_vl_hybrid`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.deepseek_vl_hybrid.image_processing_pil_deepseek_vl_hybrid`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.deepseek_vl_hybrid.image_processing_pil_deepseek_vl_hybrid`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.deepseek_vl_hybrid.image_processing_pil_deepseek_vl_hybrid`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.deepseek_vl_hybrid.image_processing_pil_deepseek_vl_hybrid`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.deformable_detr.image_processing_deformable_detr`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.deformable_detr.image_processing_deformable_detr`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.deformable_detr.image_processing_deformable_detr`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.deformable_detr.image_processing_deformable_detr`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.deformable_detr.image_processing_pil_deformable_detr`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.deformable_detr.image_processing_pil_deformable_detr`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.deformable_detr.image_processing_pil_deformable_detr`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.deformable_detr.image_processing_pil_deformable_detr`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.deit.image_processing_deit`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.deit.image_processing_deit`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.deit.image_processing_deit`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.deit.image_processing_deit`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.deit.image_processing_pil_deit`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.deit.image_processing_pil_deit`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.deit.image_processing_pil_deit`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.deit.image_processing_pil_deit`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.depth_pro.image_processing_depth_pro`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.depth_pro.image_processing_depth_pro`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.depth_pro.image_processing_depth_pro`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.depth_pro.image_processing_depth_pro`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.detr.image_processing_detr`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.detr.image_processing_detr`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.detr.image_processing_detr`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.detr.image_processing_detr`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.detr.image_processing_pil_detr`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.detr.image_processing_pil_detr`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.detr.image_processing_pil_detr`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.detr.image_processing_pil_detr`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.dinov3_vit.image_processing_dinov3_vit`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.dinov3_vit.image_processing_dinov3_vit`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.dinov3_vit.image_processing_dinov3_vit`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.dinov3_vit.image_processing_dinov3_vit`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.donut.image_processing_donut`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.donut.image_processing_donut`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.donut.image_processing_donut`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.donut.image_processing_donut`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.donut.image_processing_pil_donut`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.donut.image_processing_pil_donut`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.donut.image_processing_pil_donut`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.donut.image_processing_pil_donut`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.dpt.image_processing_dpt`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.dpt.image_processing_dpt`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.dpt.image_processing_dpt`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.dpt.image_processing_dpt`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.dpt.image_processing_pil_dpt`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.dpt.image_processing_pil_dpt`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.dpt.image_processing_pil_dpt`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.dpt.image_processing_pil_dpt`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.efficientloftr.image_processing_efficientloftr`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.efficientloftr.image_processing_efficientloftr`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.efficientloftr.image_processing_efficientloftr`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.efficientloftr.image_processing_efficientloftr`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.efficientloftr.image_processing_pil_efficientloftr`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.efficientloftr.image_processing_pil_efficientloftr`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.efficientloftr.image_processing_pil_efficientloftr`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.efficientloftr.image_processing_pil_efficientloftr`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.efficientnet.image_processing_efficientnet`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.efficientnet.image_processing_efficientnet`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.efficientnet.image_processing_efficientnet`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.efficientnet.image_processing_efficientnet`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.efficientnet.image_processing_pil_efficientnet`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.efficientnet.image_processing_pil_efficientnet`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.efficientnet.image_processing_pil_efficientnet`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.efficientnet.image_processing_pil_efficientnet`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.emu3.image_processing_emu3`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.emu3.image_processing_emu3`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.emu3.image_processing_emu3`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.emu3.image_processing_emu3`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.eomt.image_processing_eomt`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.eomt.image_processing_eomt`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.eomt.image_processing_eomt`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.eomt.image_processing_eomt`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.eomt.image_processing_pil_eomt`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.eomt.image_processing_pil_eomt`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.eomt.image_processing_pil_eomt`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.eomt.image_processing_pil_eomt`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.ernie4_5_vl_moe.image_processing_ernie4_5_vl_moe`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.ernie4_5_vl_moe.image_processing_ernie4_5_vl_moe`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.ernie4_5_vl_moe.image_processing_ernie4_5_vl_moe`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.ernie4_5_vl_moe.image_processing_ernie4_5_vl_moe`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.ernie4_5_vl_moe.image_processing_pil_ernie4_5_vl_moe`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.ernie4_5_vl_moe.image_processing_pil_ernie4_5_vl_moe`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.ernie4_5_vl_moe.image_processing_pil_ernie4_5_vl_moe`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.ernie4_5_vl_moe.image_processing_pil_ernie4_5_vl_moe`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.flava.image_processing_flava`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.flava.image_processing_flava`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.flava.image_processing_flava`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.flava.image_processing_flava`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.flava.image_processing_pil_flava`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.flava.image_processing_pil_flava`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.flava.image_processing_pil_flava`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.flava.image_processing_pil_flava`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.fuyu.image_processing_fuyu`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.fuyu.image_processing_fuyu`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.fuyu.image_processing_fuyu`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.fuyu.image_processing_fuyu`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.fuyu.image_processing_pil_fuyu`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.fuyu.image_processing_pil_fuyu`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.fuyu.image_processing_pil_fuyu`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.fuyu.image_processing_pil_fuyu`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.gemma3.image_processing_gemma3`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.gemma3.image_processing_gemma3`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.gemma3.image_processing_gemma3`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.gemma3.image_processing_gemma3`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.gemma3.image_processing_pil_gemma3`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.gemma3.image_processing_pil_gemma3`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.gemma3.image_processing_pil_gemma3`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.gemma3.image_processing_pil_gemma3`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.gemma4.image_processing_gemma4`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.gemma4.image_processing_gemma4`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.gemma4.image_processing_gemma4`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.gemma4.image_processing_gemma4`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.gemma4.image_processing_pil_gemma4`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.gemma4.image_processing_pil_gemma4`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.gemma4.image_processing_pil_gemma4`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.gemma4.image_processing_pil_gemma4`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.glm46v.image_processing_glm46v`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.glm46v.image_processing_glm46v`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.glm46v.image_processing_glm46v`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.glm46v.image_processing_glm46v`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.glm46v.image_processing_pil_glm46v`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.glm46v.image_processing_pil_glm46v`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.glm46v.image_processing_pil_glm46v`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.glm46v.image_processing_pil_glm46v`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.glm4v.image_processing_glm4v`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.glm4v.image_processing_glm4v`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.glm4v.image_processing_glm4v`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.glm4v.image_processing_glm4v`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.glm4v.image_processing_pil_glm4v`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.glm4v.image_processing_pil_glm4v`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.glm4v.image_processing_pil_glm4v`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.glm4v.image_processing_pil_glm4v`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.glm_image.image_processing_glm_image`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.glm_image.image_processing_glm_image`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.glm_image.image_processing_glm_image`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.glm_image.image_processing_glm_image`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.glm_image.image_processing_pil_glm_image`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.glm_image.image_processing_pil_glm_image`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.glm_image.image_processing_pil_glm_image`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.glm_image.image_processing_pil_glm_image`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.glpn.image_processing_glpn`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.glpn.image_processing_glpn`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.glpn.image_processing_glpn`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.glpn.image_processing_glpn`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.glpn.image_processing_pil_glpn`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.glpn.image_processing_pil_glpn`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.glpn.image_processing_pil_glpn`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.glpn.image_processing_pil_glpn`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.got_ocr2.image_processing_got_ocr2`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.got_ocr2.image_processing_got_ocr2`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.got_ocr2.image_processing_got_ocr2`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.got_ocr2.image_processing_got_ocr2`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.got_ocr2.image_processing_pil_got_ocr2`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.got_ocr2.image_processing_pil_got_ocr2`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.got_ocr2.image_processing_pil_got_ocr2`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.got_ocr2.image_processing_pil_got_ocr2`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.grounding_dino.image_processing_grounding_dino`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.grounding_dino.image_processing_grounding_dino`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.grounding_dino.image_processing_grounding_dino`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.grounding_dino.image_processing_grounding_dino`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.grounding_dino.image_processing_pil_grounding_dino`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.grounding_dino.image_processing_pil_grounding_dino`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.grounding_dino.image_processing_pil_grounding_dino`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.grounding_dino.image_processing_pil_grounding_dino`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.idefics.image_processing_idefics`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.idefics.image_processing_idefics`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.idefics.image_processing_idefics`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.idefics.image_processing_idefics`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.idefics.image_processing_pil_idefics`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.idefics.image_processing_pil_idefics`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.idefics.image_processing_pil_idefics`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.idefics.image_processing_pil_idefics`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.idefics2.image_processing_idefics2`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.idefics2.image_processing_idefics2`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.idefics2.image_processing_idefics2`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.idefics2.image_processing_idefics2`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.idefics2.image_processing_pil_idefics2`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.idefics2.image_processing_pil_idefics2`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.idefics2.image_processing_pil_idefics2`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.idefics2.image_processing_pil_idefics2`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.idefics3.image_processing_idefics3`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.idefics3.image_processing_idefics3`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.idefics3.image_processing_idefics3`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.idefics3.image_processing_idefics3`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.idefics3.image_processing_pil_idefics3`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.idefics3.image_processing_pil_idefics3`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.idefics3.image_processing_pil_idefics3`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.idefics3.image_processing_pil_idefics3`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.imagegpt.image_processing_imagegpt`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.imagegpt.image_processing_imagegpt`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.imagegpt.image_processing_imagegpt`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.imagegpt.image_processing_imagegpt`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.imagegpt.image_processing_pil_imagegpt`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.imagegpt.image_processing_pil_imagegpt`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.imagegpt.image_processing_pil_imagegpt`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.imagegpt.image_processing_pil_imagegpt`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.janus.image_processing_janus`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.janus.image_processing_janus`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.janus.image_processing_janus`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.janus.image_processing_janus`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.janus.image_processing_pil_janus`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.janus.image_processing_pil_janus`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.janus.image_processing_pil_janus`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.janus.image_processing_pil_janus`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.kosmos2_5.image_processing_kosmos2_5`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.kosmos2_5.image_processing_kosmos2_5`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.kosmos2_5.image_processing_kosmos2_5`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.kosmos2_5.image_processing_kosmos2_5`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.kosmos2_5.image_processing_pil_kosmos2_5`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.kosmos2_5.image_processing_pil_kosmos2_5`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.kosmos2_5.image_processing_pil_kosmos2_5`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.kosmos2_5.image_processing_pil_kosmos2_5`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.layoutlmv2.image_processing_layoutlmv2`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.layoutlmv2.image_processing_layoutlmv2`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.layoutlmv2.image_processing_layoutlmv2`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.layoutlmv2.image_processing_layoutlmv2`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.layoutlmv2.image_processing_pil_layoutlmv2`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.layoutlmv2.image_processing_pil_layoutlmv2`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.layoutlmv2.image_processing_pil_layoutlmv2`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.layoutlmv2.image_processing_pil_layoutlmv2`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.layoutlmv3.image_processing_layoutlmv3`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.layoutlmv3.image_processing_layoutlmv3`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.layoutlmv3.image_processing_layoutlmv3`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.layoutlmv3.image_processing_layoutlmv3`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.layoutlmv3.image_processing_pil_layoutlmv3`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.layoutlmv3.image_processing_pil_layoutlmv3`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.layoutlmv3.image_processing_pil_layoutlmv3`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.layoutlmv3.image_processing_pil_layoutlmv3`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.levit.image_processing_levit`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.levit.image_processing_levit`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.levit.image_processing_levit`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.levit.image_processing_levit`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.levit.image_processing_pil_levit`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.levit.image_processing_pil_levit`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.levit.image_processing_pil_levit`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.levit.image_processing_pil_levit`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.lfm2_vl.image_processing_lfm2_vl`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.lfm2_vl.image_processing_lfm2_vl`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.lfm2_vl.image_processing_lfm2_vl`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.lfm2_vl.image_processing_lfm2_vl`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.lightglue.image_processing_lightglue`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.lightglue.image_processing_lightglue`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.lightglue.image_processing_lightglue`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.lightglue.image_processing_lightglue`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.lightglue.image_processing_pil_lightglue`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.lightglue.image_processing_pil_lightglue`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.lightglue.image_processing_pil_lightglue`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.lightglue.image_processing_pil_lightglue`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.llama4.image_processing_llama4`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.llama4.image_processing_llama4`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.llama4.image_processing_llama4`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.llama4.image_processing_llama4`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.llava.image_processing_llava`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.llava.image_processing_llava`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.llava.image_processing_llava`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.llava.image_processing_llava`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.llava.image_processing_pil_llava`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.llava.image_processing_pil_llava`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.llava.image_processing_pil_llava`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.llava.image_processing_pil_llava`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.llava_next.image_processing_llava_next`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.llava_next.image_processing_llava_next`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.llava_next.image_processing_llava_next`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.llava_next.image_processing_llava_next`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.llava_next.image_processing_pil_llava_next`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.llava_next.image_processing_pil_llava_next`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.llava_next.image_processing_pil_llava_next`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.llava_next.image_processing_pil_llava_next`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.llava_onevision.image_processing_llava_onevision`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.llava_onevision.image_processing_llava_onevision`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.llava_onevision.image_processing_llava_onevision`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.llava_onevision.image_processing_llava_onevision`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.llava_onevision.image_processing_pil_llava_onevision`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.llava_onevision.image_processing_pil_llava_onevision`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.llava_onevision.image_processing_pil_llava_onevision`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.llava_onevision.image_processing_pil_llava_onevision`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.mask2former.image_processing_mask2former`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.mask2former.image_processing_mask2former`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.mask2former.image_processing_mask2former`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.mask2former.image_processing_mask2former`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.mask2former.image_processing_pil_mask2former`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.mask2former.image_processing_pil_mask2former`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.mask2former.image_processing_pil_mask2former`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.mask2former.image_processing_pil_mask2former`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.maskformer.image_processing_maskformer`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.maskformer.image_processing_maskformer`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.maskformer.image_processing_maskformer`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.maskformer.image_processing_maskformer`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.maskformer.image_processing_pil_maskformer`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.maskformer.image_processing_pil_maskformer`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.maskformer.image_processing_pil_maskformer`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.maskformer.image_processing_pil_maskformer`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.mllama.image_processing_mllama`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.mllama.image_processing_mllama`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.mllama.image_processing_mllama`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.mllama.image_processing_mllama`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.mllama.image_processing_pil_mllama`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.mllama.image_processing_pil_mllama`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.mllama.image_processing_pil_mllama`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.mllama.image_processing_pil_mllama`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.mobilenet_v1.image_processing_mobilenet_pil_v1`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.mobilenet_v1.image_processing_mobilenet_pil_v1`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.mobilenet_v1.image_processing_mobilenet_pil_v1`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.mobilenet_v1.image_processing_mobilenet_pil_v1`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.mobilenet_v1.image_processing_mobilenet_v1`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.mobilenet_v1.image_processing_mobilenet_v1`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.mobilenet_v1.image_processing_mobilenet_v1`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.mobilenet_v1.image_processing_mobilenet_v1`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.mobilenet_v2.image_processing_mobilenet_v2`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.mobilenet_v2.image_processing_mobilenet_v2`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.mobilenet_v2.image_processing_mobilenet_v2`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.mobilenet_v2.image_processing_mobilenet_v2`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.mobilenet_v2.image_processing_pil_mobilenet_v2`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.mobilenet_v2.image_processing_pil_mobilenet_v2`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.mobilenet_v2.image_processing_pil_mobilenet_v2`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.mobilenet_v2.image_processing_pil_mobilenet_v2`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.mobilevit.image_processing_mobilevit`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.mobilevit.image_processing_mobilevit`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.mobilevit.image_processing_mobilevit`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.mobilevit.image_processing_mobilevit`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.mobilevit.image_processing_pil_mobilevit`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.mobilevit.image_processing_pil_mobilevit`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.mobilevit.image_processing_pil_mobilevit`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.mobilevit.image_processing_pil_mobilevit`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.nougat.image_processing_nougat`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.nougat.image_processing_nougat`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.nougat.image_processing_nougat`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.nougat.image_processing_nougat`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.nougat.image_processing_pil_nougat`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.nougat.image_processing_pil_nougat`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.nougat.image_processing_pil_nougat`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.nougat.image_processing_pil_nougat`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.oneformer.image_processing_oneformer`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.oneformer.image_processing_oneformer`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.oneformer.image_processing_oneformer`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.oneformer.image_processing_oneformer`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.oneformer.image_processing_pil_oneformer`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.oneformer.image_processing_pil_oneformer`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.oneformer.image_processing_pil_oneformer`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.oneformer.image_processing_pil_oneformer`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.ovis2.image_processing_ovis2`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.ovis2.image_processing_ovis2`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.ovis2.image_processing_ovis2`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.ovis2.image_processing_ovis2`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.ovis2.image_processing_pil_ovis2`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.ovis2.image_processing_pil_ovis2`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.ovis2.image_processing_pil_ovis2`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.ovis2.image_processing_pil_ovis2`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.owlv2.image_processing_owlv2`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.owlv2.image_processing_owlv2`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.owlv2.image_processing_owlv2`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.owlv2.image_processing_owlv2`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.owlv2.image_processing_pil_owlv2`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.owlv2.image_processing_pil_owlv2`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.owlv2.image_processing_pil_owlv2`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.owlv2.image_processing_pil_owlv2`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.owlvit.image_processing_owlvit`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.owlvit.image_processing_owlvit`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.owlvit.image_processing_owlvit`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.owlvit.image_processing_owlvit`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.owlvit.image_processing_pil_owlvit`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.owlvit.image_processing_pil_owlvit`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.owlvit.image_processing_pil_owlvit`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.owlvit.image_processing_pil_owlvit`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.paddleocr_vl.image_processing_paddleocr_vl`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.paddleocr_vl.image_processing_paddleocr_vl`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.paddleocr_vl.image_processing_paddleocr_vl`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.paddleocr_vl.image_processing_paddleocr_vl`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.paddleocr_vl.image_processing_pil_paddleocr_vl`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.paddleocr_vl.image_processing_pil_paddleocr_vl`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.paddleocr_vl.image_processing_pil_paddleocr_vl`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.paddleocr_vl.image_processing_pil_paddleocr_vl`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.perceiver.image_processing_perceiver`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.perceiver.image_processing_perceiver`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.perceiver.image_processing_perceiver`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.perceiver.image_processing_perceiver`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.perceiver.image_processing_pil_perceiver`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.perceiver.image_processing_pil_perceiver`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.perceiver.image_processing_pil_perceiver`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.perceiver.image_processing_pil_perceiver`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.perception_lm.image_processing_perception_lm`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.perception_lm.image_processing_perception_lm`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.perception_lm.image_processing_perception_lm`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.perception_lm.image_processing_perception_lm`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.phi4_multimodal.image_processing_phi4_multimodal`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.phi4_multimodal.image_processing_phi4_multimodal`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.phi4_multimodal.image_processing_phi4_multimodal`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.phi4_multimodal.image_processing_phi4_multimodal`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.pi0.image_processing_pi0`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.pi0.image_processing_pi0`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.pi0.image_processing_pi0`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.pi0.image_processing_pi0`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.pix2struct.image_processing_pil_pix2struct`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.pix2struct.image_processing_pil_pix2struct`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.pix2struct.image_processing_pil_pix2struct`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.pix2struct.image_processing_pil_pix2struct`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.pix2struct.image_processing_pix2struct`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.pix2struct.image_processing_pix2struct`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.pix2struct.image_processing_pix2struct`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.pix2struct.image_processing_pix2struct`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.pixtral.image_processing_pil_pixtral`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.pixtral.image_processing_pil_pixtral`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.pixtral.image_processing_pil_pixtral`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.pixtral.image_processing_pil_pixtral`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.pixtral.image_processing_pixtral`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.pixtral.image_processing_pixtral`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.pixtral.image_processing_pixtral`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.pixtral.image_processing_pixtral`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.poolformer.image_processing_pil_poolformer`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.poolformer.image_processing_pil_poolformer`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.poolformer.image_processing_pil_poolformer`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.poolformer.image_processing_pil_poolformer`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.poolformer.image_processing_poolformer`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.poolformer.image_processing_poolformer`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.poolformer.image_processing_poolformer`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.poolformer.image_processing_poolformer`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.pp_chart2table.image_processing_pil_pp_chart2table`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.pp_chart2table.image_processing_pil_pp_chart2table`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.pp_chart2table.image_processing_pil_pp_chart2table`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.pp_chart2table.image_processing_pil_pp_chart2table`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.pp_chart2table.image_processing_pp_chart2table`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.pp_chart2table.image_processing_pp_chart2table`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.pp_chart2table.image_processing_pp_chart2table`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.pp_chart2table.image_processing_pp_chart2table`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.pp_doclayout_v2.image_processing_pp_doclayout_v2`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.pp_doclayout_v2.image_processing_pp_doclayout_v2`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.pp_doclayout_v2.image_processing_pp_doclayout_v2`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.pp_doclayout_v2.image_processing_pp_doclayout_v2`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.pp_doclayout_v3.image_processing_pp_doclayout_v3`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.pp_doclayout_v3.image_processing_pp_doclayout_v3`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.pp_doclayout_v3.image_processing_pp_doclayout_v3`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.pp_doclayout_v3.image_processing_pp_doclayout_v3`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.pp_lcnet.image_processing_pp_lcnet`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.pp_lcnet.image_processing_pp_lcnet`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.pp_lcnet.image_processing_pp_lcnet`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.pp_lcnet.image_processing_pp_lcnet`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.pp_ocrv5_server_det.image_processing_pp_ocrv5_server_det`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.pp_ocrv5_server_det.image_processing_pp_ocrv5_server_det`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.pp_ocrv5_server_det.image_processing_pp_ocrv5_server_det`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.pp_ocrv5_server_det.image_processing_pp_ocrv5_server_det`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.pp_ocrv5_server_rec.image_processing_pp_ocrv5_server_rec`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.pp_ocrv5_server_rec.image_processing_pp_ocrv5_server_rec`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.pp_ocrv5_server_rec.image_processing_pp_ocrv5_server_rec`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.pp_ocrv5_server_rec.image_processing_pp_ocrv5_server_rec`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.prompt_depth_anything.image_processing_pil_prompt_depth_anything`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.prompt_depth_anything.image_processing_pil_prompt_depth_anything`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.prompt_depth_anything.image_processing_pil_prompt_depth_anything`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.prompt_depth_anything.image_processing_pil_prompt_depth_anything`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.prompt_depth_anything.image_processing_prompt_depth_anything`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.prompt_depth_anything.image_processing_prompt_depth_anything`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.prompt_depth_anything.image_processing_prompt_depth_anything`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.prompt_depth_anything.image_processing_prompt_depth_anything`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.pvt.image_processing_pil_pvt`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.pvt.image_processing_pil_pvt`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.pvt.image_processing_pil_pvt`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.pvt.image_processing_pil_pvt`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.pvt.image_processing_pvt`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.pvt.image_processing_pvt`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.pvt.image_processing_pvt`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.pvt.image_processing_pvt`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.qwen2_vl.image_processing_pil_qwen2_vl`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.qwen2_vl.image_processing_pil_qwen2_vl`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.qwen2_vl.image_processing_pil_qwen2_vl`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.qwen2_vl.image_processing_pil_qwen2_vl`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.qwen2_vl.image_processing_qwen2_vl`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.qwen2_vl.image_processing_qwen2_vl`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.qwen2_vl.image_processing_qwen2_vl`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.qwen2_vl.image_processing_qwen2_vl`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.rt_detr.image_processing_pil_rt_detr`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.rt_detr.image_processing_pil_rt_detr`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.rt_detr.image_processing_pil_rt_detr`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.rt_detr.image_processing_pil_rt_detr`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.rt_detr.image_processing_rt_detr`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.rt_detr.image_processing_rt_detr`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.rt_detr.image_processing_rt_detr`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.rt_detr.image_processing_rt_detr`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.sam.image_processing_pil_sam`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.sam.image_processing_pil_sam`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.sam.image_processing_pil_sam`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.sam.image_processing_pil_sam`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.sam.image_processing_sam`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.sam.image_processing_sam`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.sam.image_processing_sam`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.sam.image_processing_sam`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.sam2.image_processing_sam2`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.sam2.image_processing_sam2`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.sam2.image_processing_sam2`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.sam2.image_processing_sam2`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.sam3.image_processing_sam3`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.sam3.image_processing_sam3`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.sam3.image_processing_sam3`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.sam3.image_processing_sam3`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.segformer.image_processing_pil_segformer`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.segformer.image_processing_pil_segformer`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.segformer.image_processing_pil_segformer`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.segformer.image_processing_pil_segformer`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.segformer.image_processing_segformer`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.segformer.image_processing_segformer`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.segformer.image_processing_segformer`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.segformer.image_processing_segformer`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.seggpt.image_processing_pil_seggpt`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.seggpt.image_processing_pil_seggpt`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.seggpt.image_processing_pil_seggpt`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.seggpt.image_processing_pil_seggpt`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.seggpt.image_processing_seggpt`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.seggpt.image_processing_seggpt`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.seggpt.image_processing_seggpt`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.seggpt.image_processing_seggpt`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.siglip.image_processing_pil_siglip`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.siglip.image_processing_pil_siglip`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.siglip.image_processing_pil_siglip`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.siglip.image_processing_pil_siglip`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.siglip.image_processing_siglip`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.siglip.image_processing_siglip`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.siglip.image_processing_siglip`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.siglip.image_processing_siglip`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.siglip2.image_processing_pil_siglip2`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.siglip2.image_processing_pil_siglip2`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.siglip2.image_processing_pil_siglip2`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.siglip2.image_processing_pil_siglip2`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.siglip2.image_processing_siglip2`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.siglip2.image_processing_siglip2`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.siglip2.image_processing_siglip2`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.siglip2.image_processing_siglip2`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.slanext.image_processing_slanext`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.slanext.image_processing_slanext`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.slanext.image_processing_slanext`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.slanext.image_processing_slanext`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.smolvlm.image_processing_pil_smolvlm`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.smolvlm.image_processing_pil_smolvlm`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.smolvlm.image_processing_pil_smolvlm`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.smolvlm.image_processing_pil_smolvlm`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.smolvlm.image_processing_smolvlm`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.smolvlm.image_processing_smolvlm`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.smolvlm.image_processing_smolvlm`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.smolvlm.image_processing_smolvlm`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.superglue.image_processing_pil_superglue`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.superglue.image_processing_pil_superglue`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.superglue.image_processing_pil_superglue`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.superglue.image_processing_pil_superglue`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.superglue.image_processing_superglue`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.superglue.image_processing_superglue`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.superglue.image_processing_superglue`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.superglue.image_processing_superglue`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.superpoint.image_processing_pil_superpoint`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.superpoint.image_processing_pil_superpoint`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.superpoint.image_processing_pil_superpoint`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.superpoint.image_processing_pil_superpoint`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.superpoint.image_processing_superpoint`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.superpoint.image_processing_superpoint`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.superpoint.image_processing_superpoint`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.superpoint.image_processing_superpoint`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.swin2sr.image_processing_pil_swin2sr`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.swin2sr.image_processing_pil_swin2sr`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.swin2sr.image_processing_pil_swin2sr`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.swin2sr.image_processing_pil_swin2sr`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.swin2sr.image_processing_swin2sr`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.swin2sr.image_processing_swin2sr`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.swin2sr.image_processing_swin2sr`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.swin2sr.image_processing_swin2sr`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.textnet.image_processing_pil_textnet`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.textnet.image_processing_pil_textnet`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.textnet.image_processing_pil_textnet`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.textnet.image_processing_pil_textnet`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.textnet.image_processing_textnet`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.textnet.image_processing_textnet`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.textnet.image_processing_textnet`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.textnet.image_processing_textnet`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.timm_wrapper.image_processing_timm_wrapper`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.timm_wrapper.image_processing_timm_wrapper`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.timm_wrapper.image_processing_timm_wrapper`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.timm_wrapper.image_processing_timm_wrapper`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.tvp.image_processing_pil_tvp`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.tvp.image_processing_pil_tvp`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.tvp.image_processing_pil_tvp`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.tvp.image_processing_pil_tvp`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.tvp.image_processing_tvp`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.tvp.image_processing_tvp`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.tvp.image_processing_tvp`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.tvp.image_processing_tvp`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.uvdoc.image_processing_uvdoc`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.uvdoc.image_processing_uvdoc`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.uvdoc.image_processing_uvdoc`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.uvdoc.image_processing_uvdoc`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.video_llama_3.image_processing_pil_video_llama_3`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.video_llama_3.image_processing_pil_video_llama_3`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.video_llama_3.image_processing_pil_video_llama_3`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.video_llama_3.image_processing_pil_video_llama_3`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.video_llama_3.image_processing_video_llama_3`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.video_llama_3.image_processing_video_llama_3`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.video_llama_3.image_processing_video_llama_3`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.video_llama_3.image_processing_video_llama_3`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.video_llava.image_processing_video_llava`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.video_llava.image_processing_video_llava`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.video_llava.image_processing_video_llava`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.video_llava.image_processing_video_llava`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.videomae.image_processing_pil_videomae`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.videomae.image_processing_pil_videomae`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.videomae.image_processing_pil_videomae`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.videomae.image_processing_pil_videomae`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.videomae.image_processing_videomae`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.videomae.image_processing_videomae`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.videomae.image_processing_videomae`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.videomae.image_processing_videomae`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.vilt.image_processing_pil_vilt`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.vilt.image_processing_pil_vilt`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.vilt.image_processing_pil_vilt`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.vilt.image_processing_pil_vilt`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.vilt.image_processing_vilt`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.vilt.image_processing_vilt`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.vilt.image_processing_vilt`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.vilt.image_processing_vilt`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.vit.image_processing_pil_vit`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.vit.image_processing_pil_vit`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.vit.image_processing_pil_vit`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.vit.image_processing_pil_vit`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.vit.image_processing_vit`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.vit.image_processing_vit`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.vit.image_processing_vit`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.vit.image_processing_vit`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.vitmatte.image_processing_pil_vitmatte`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.vitmatte.image_processing_pil_vitmatte`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.vitmatte.image_processing_pil_vitmatte`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.vitmatte.image_processing_pil_vitmatte`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.vitmatte.image_processing_vitmatte`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.vitmatte.image_processing_vitmatte`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.vitmatte.image_processing_vitmatte`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.vitmatte.image_processing_vitmatte`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.vitpose.image_processing_pil_vitpose`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.vitpose.image_processing_pil_vitpose`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.vitpose.image_processing_pil_vitpose`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.vitpose.image_processing_pil_vitpose`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.vitpose.image_processing_vitpose`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.vitpose.image_processing_vitpose`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.vitpose.image_processing_vitpose`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.vitpose.image_processing_vitpose`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.vivit.image_processing_vivit`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.vivit.image_processing_vivit`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.vivit.image_processing_vivit`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.vivit.image_processing_vivit`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.yolos.image_processing_pil_yolos`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.yolos.image_processing_pil_yolos`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.yolos.image_processing_pil_yolos`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.yolos.image_processing_pil_yolos`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.yolos.image_processing_yolos`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.yolos.image_processing_yolos`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.yolos.image_processing_yolos`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.yolos.image_processing_yolos`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.zoedepth.image_processing_pil_zoedepth`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.zoedepth.image_processing_pil_zoedepth`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.zoedepth.image_processing_pil_zoedepth`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.zoedepth.image_processing_pil_zoedepth`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_fn` from `.models.zoedepth.image_processing_zoedepth`. Returning `causal_conv1d_fn` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `causal_conv1d_update` from `.models.zoedepth.image_processing_zoedepth`. Returning `causal_conv1d_update` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_lazy_load_causal_conv1d` from `.models.zoedepth.image_processing_zoedepth`. Returning `_lazy_load_causal_conv1d` instead. Behavior may be different and this alias will be removed in future versions. +Accessing `_causal_conv1d_cache` from `.models.zoedepth.image_processing_zoedepth`. Returning `_causal_conv1d_cache` instead. Behavior may be different and this alias will be removed in future versions. +2026-04-13:07:00:08 WARNING [config.evaluate_config:281] --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT. +2026-04-13:07:00:09 INFO [_cli.run:376] Selected Tasks: ['mmlu', 'hellaswag', 'arc_easy', 'arc_challenge', 'winogrande', 'kmmlu', 'kobest_boolq', 'kobest_copa', 'kobest_hellaswag'] +🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning. +Unsloth: causal_conv1d CUDA kernels not compatible with this GPU. Using PyTorch slow path for Mamba models. +Unsloth: Your Flash Attention 2 installation seems to be broken. Using Xformers instead. No performance changes will be seen. +🦥 Unsloth Zoo will now patch everything to make training faster! + +Loading weights: 0%| | 0/398 [00:00", + "errors": "replace", + "is_local": false, + "model_max_length": 32768, + "pad_token": "<|PAD_TOKEN|>", + "padding_side": "left", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..1f1e6cf --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,15 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "is_local": false, + "model_max_length": 32768, + "pad_token": "<|PAD_TOKEN|>", + "padding_side": "left", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..219b645 --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.0, + "total_flos": 1.6013083311596544e+16, + "train_loss": 1.9445130242241753, + "train_runtime": 226.5501, + "train_samples_per_second": 15.727, + "train_steps_per_second": 0.397 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..aafb505 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,114 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 90, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.11204481792717087, + "grad_norm": 14.0, + "learning_rate": 1e-05, + "loss": 2.211936187744141, + "step": 10 + }, + { + "epoch": 0.22408963585434175, + "grad_norm": 7.21875, + "learning_rate": 9.628619846344453e-06, + "loss": 1.982374382019043, + "step": 20 + }, + { + "epoch": 0.33613445378151263, + "grad_norm": 6.75, + "learning_rate": 8.569648672789496e-06, + "loss": 1.939706802368164, + "step": 30 + }, + { + "epoch": 0.4481792717086835, + "grad_norm": 8.0, + "learning_rate": 6.980398830195785e-06, + "loss": 1.9272880554199219, + "step": 40 + }, + { + "epoch": 0.5602240896358543, + "grad_norm": 4.5, + "learning_rate": 5.096956658859122e-06, + "loss": 1.8303054809570312, + "step": 50 + }, + { + "epoch": 0.6722689075630253, + "grad_norm": 5.53125, + "learning_rate": 3.1991113759764493e-06, + "loss": 1.8838277816772462, + "step": 60 + }, + { + "epoch": 0.7843137254901961, + "grad_norm": 4.875, + "learning_rate": 1.5687918106563326e-06, + "loss": 1.8752876281738282, + "step": 70 + }, + { + "epoch": 0.896358543417367, + "grad_norm": 4.53125, + "learning_rate": 4.481852951692672e-07, + "loss": 1.8907934188842774, + "step": 80 + }, + { + "epoch": 1.0, + "grad_norm": 26.25, + "learning_rate": 3.760237478849793e-09, + "loss": 1.9590974807739259, + "step": 90 + }, + { + "epoch": 1.0, + "eval_loss": 1.7950044870376587, + "eval_runtime": 4.0343, + "eval_samples_per_second": 17.847, + "eval_steps_per_second": 2.231, + "step": 90 + }, + { + "epoch": 1.0, + "step": 90, + "total_flos": 1.6013083311596544e+16, + "train_loss": 1.9445130242241753, + "train_runtime": 226.5501, + "train_samples_per_second": 15.727, + "train_steps_per_second": 0.397 + } + ], + "logging_steps": 10, + "max_steps": 90, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.6013083311596544e+16, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..d9d5c40 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a665cf825105cd5773c40316561897beba1d0ede4c24ba1bf17b1b2984ae2d +size 5777