commit a8f6813e1eed93c483bfda55354890f9492a5775
Author: ModelHub XC <noreply@modelhub.org.cn>
Date:   Wed Jun 3 02:21:14 2026 +0800

    初始化项目，由ModelHub XC社区提供模型
    
    Model: dphn/dolphin-2.2-mistral-7b
    Source: Original Platform

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..a6344aa
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..4973cd0
--- /dev/null
+++ b/README.md
@@ -0,0 +1,13 @@
+---
+license: apache-2.0
+base_model: mistralai/Mistral-7B-v0.1
+datasets:
+- ehartford/dolphin
+- jondurbin/airoboros-2.2.1
+language:
+- en
+---
+
+# dolphin-2.2-mistral-7b
+
+This model was overfit and has been re-released as [dolphin-2.2.1-mistral-7b](https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b).  Please use that model instead.
diff --git a/added_tokens.json b/added_tokens.json
new file mode 100644
index 0000000..e36863d
--- /dev/null
+++ b/added_tokens.json
@@ -0,0 +1,4 @@
+{
+  "<|im_end|>": 32000,
+  "<|im_start|>": 32001
+}
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..ef0daa3
--- /dev/null
+++ b/config.json
@@ -0,0 +1,25 @@
+{
+  "_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 32768,
+  "model_type": "mistral",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 10000.0,
+  "sliding_window": 4096,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.34.1",
+  "use_cache": false,
+  "vocab_size": 32002
+}
diff --git a/configs/dolphin-mistral-7b.yml b/configs/dolphin-mistral-7b.yml
new file mode 100644
index 0000000..70fcba4
--- /dev/null
+++ b/configs/dolphin-mistral-7b.yml
@@ -0,0 +1,71 @@
+base_model: mistralai/Mistral-7B-v0.1
+model_type: MistralForCausalLM
+tokenizer_type: LlamaTokenizer
+is_mistral_derived_model: true
+
+load_in_8bit: false
+load_in_4bit: false
+strict: false
+
+datasets:
+  - path: /workspace/datasets/dolphin/dolphin201.jsonl
+    type: alpaca_w_system.load_open_orca_chatml
+  - path: /workspace/datasets/WizardLM_evol_instruct_cleaned.jsonl
+    type: sharegpt
+    conversation: chatml
+  - path: /workspace/datasets/not_samantha_norefusals.jsonl
+    type: sharegpt
+    conversation: chatml
+dataset_prepared_path: last_run_prepared
+val_set_size: 0
+output_dir: /workspace/dolphin-2.2-mistral-7b
+
+sequence_len: 8192
+sample_packing: true
+pad_to_sequence_len: true
+
+wandb_project: dolphin
+wandb_entity:
+wandb_watch:
+wandb_run_id:
+wandb_log_model:
+
+gradient_accumulation_steps: 4
+micro_batch_size: 5
+num_epochs: 4
+adam_beta2: 0.95
+adam_epsilon: 0.00001
+max_grad_norm: 1.0
+lr_scheduler: cosine
+learning_rate: 0.000006
+
+train_on_inputs: false
+group_by_length: false
+bf16: true
+fp16: false
+tf32: false
+
+gradient_checkpointing: true
+early_stopping_patience:
+resume_from_checkpoint:
+local_rank:
+logging_steps: 1
+xformers_attention:
+flash_attention: true
+
+warmup_steps: 100
+eval_steps:
+eval_table_size:
+eval_table_max_new_tokens:
+eval_sample_packing: false
+save_steps: 0.25
+debug:
+deepspeed: deepspeed/zero2.json
+weight_decay: 0.1
+fsdp:
+fsdp_config:
+special_tokens:
+  eos_token: "<|im_end|>"
+tokens:
+  - "<|im_start|>"
+  - "<|im_end|>"
\ No newline at end of file
diff --git a/configuration.json b/configuration.json
new file mode 100644
index 0000000..bbeeda1
--- /dev/null
+++ b/configuration.json
@@ -0,0 +1 @@
+{"framework": "pytorch", "task": "text-generation", "allow_remote": true}
\ No newline at end of file
diff --git a/eval.sh b/eval.sh
new file mode 100644
index 0000000..0f7c832
--- /dev/null
+++ b/eval.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+MODEL_PATH="cognitivecomputations/dolphin-2.2-mistral-7b"
+MODEL_NAME="dolphin-2.2-mistral-7b"
+RESULTS_PATH="/workspace/results/$MODEL_NAME"
+mkdir -p "$RESULTS_PATH"
+
+PRETRAINED_ARGS="$MODEL_PATH,tensor_parallel_size=4,dtype=auto,trust_remote_code=True,gpu_memory_utilization=0.8"
+MODEL_ARGS="pretrained=$PRETRAINED_ARGS"
+
+tasks=(
+    "mmlu" 
+    "truthfulqa" 
+    "gsm8k" 
+    "hellaswag" 
+    "arc_challenge" 
+    "winogrande")
+
+# Function to get the number of fewshot for a given task
+get_num_fewshot() {
+    case "$1" in
+        "mmlu")          echo 5 ;;
+        "truthfulqa")    echo 0 ;;
+        "gsm8k")         echo 5 ;;
+        "hellaswag")     echo 10 ;;
+        "arc_challenge") echo 25 ;;
+        "winogrande")    echo 5 ;;
+        *)               echo 0 ;;
+    esac
+}
+
+for TASK in "${tasks[@]}"; do
+    echo lm_eval --model vllm --model_args "$MODEL_ARGS" --task="$TASK" --num_fewshot "$(get_num_fewshot "$TASK")" --batch_size 8 --output_path "$RESULTS_PATH/$TASK.json"
+    lm_eval --model vllm --model_args "$MODEL_ARGS" --task="$TASK" --num_fewshot "$(get_num_fewshot "$TASK")" --batch_size 8 --output_path "$RESULTS_PATH/$TASK.json"
+done
+
+
+jq -s '[.[]]' $RESULTS_PATH/*.json > $RESULTS_PATH/eval_results.json
+
+huggingface-cli upload cognitivecomputations/$MODEL_NAME $RESULTS_PATH/eval_results.json
+huggingface-cli upload cognitivecomputations/$MODEL_NAME eval.sh
\ No newline at end of file
diff --git a/eval_results.json b/eval_results.json
new file mode 100644
index 0000000..7eb4326
--- /dev/null
+++ b/eval_results.json
@@ -0,0 +1,3210 @@
+[
+  {
+    "results": {
+      "arc_challenge": {
+        "acc,none": 0.6006825938566553,
+        "acc_stderr,none": 0.014312094557946705,
+        "acc_norm,none": 0.6271331058020477,
+        "acc_norm_stderr,none": 0.014131176760131165,
+        "alias": "arc_challenge"
+      }
+    },
+    "configs": {
+      "arc_challenge": {
+        "task": "arc_challenge",
+        "group": [
+          "ai2_arc"
+        ],
+        "dataset_path": "ai2_arc",
+        "dataset_name": "ARC-Challenge",
+        "training_split": "train",
+        "validation_split": "validation",
+        "test_split": "test",
+        "doc_to_text": "Question: {{question}}\nAnswer:",
+        "doc_to_target": "{{choices.label.index(answerKey)}}",
+        "doc_to_choice": "{{choices.text}}",
+        "description": "",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "num_fewshot": 25,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          },
+          {
+            "metric": "acc_norm",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": true,
+        "doc_to_decontamination_query": "Question: {{question}}\nAnswer:",
+        "metadata": {
+          "version": 1
+        }
+      }
+    },
+    "versions": {
+      "arc_challenge": "Yaml"
+    },
+    "n-shot": {
+      "arc_challenge": 25
+    },
+    "config": {
+      "model": "vllm",
+      "model_args": "pretrained=cognitivecomputations/dolphin-2.2-mistral-7b,tensor_parallel_size=4,dtype=auto,trust_remote_code=True,gpu_memory_utilization=0.8",
+      "batch_size": "8",
+      "batch_sizes": [],
+      "device": null,
+      "use_cache": null,
+      "limit": null,
+      "bootstrap_iters": 100000,
+      "gen_kwargs": null
+    },
+    "git_hash": "46c79664"
+  },
+  {
+    "results": {
+      "gsm8k": {
+        "exact_match,get-answer": 0.5458680818802123,
+        "exact_match_stderr,get-answer": 0.013714410945264554,
+        "alias": "gsm8k"
+      }
+    },
+    "configs": {
+      "gsm8k": {
+        "task": "gsm8k",
+        "group": [
+          "math_word_problems"
+        ],
+        "dataset_path": "gsm8k",
+        "dataset_name": "main",
+        "training_split": "train",
+        "test_split": "test",
+        "fewshot_split": "train",
+        "doc_to_text": "Question: {{question}}\nAnswer:",
+        "doc_to_target": "{{answer}}",
+        "description": "",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "exact_match",
+            "aggregation": "mean",
+            "higher_is_better": true,
+            "ignore_case": true,
+            "ignore_punctuation": false,
+            "regexes_to_ignore": [
+              ",",
+              "\\$",
+              "(?s).*#### "
+            ]
+          }
+        ],
+        "output_type": "generate_until",
+        "generation_kwargs": {
+          "until": [
+            "\n\n",
+            "Question:"
+          ],
+          "do_sample": false,
+          "temperature": 0
+        },
+        "repeats": 1,
+        "filter_list": [
+          {
+            "name": "get-answer",
+            "filter": [
+              {
+                "function": "regex",
+                "regex_pattern": "#### (\\-?[0-9\\.\\,]+)"
+              },
+              {
+                "function": "take_first"
+              }
+            ]
+          }
+        ],
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 1
+        }
+      }
+    },
+    "versions": {
+      "gsm8k": "Yaml"
+    },
+    "n-shot": {
+      "gsm8k": 5
+    },
+    "config": {
+      "model": "vllm",
+      "model_args": "pretrained=cognitivecomputations/dolphin-2.2-mistral-7b,tensor_parallel_size=4,dtype=auto,trust_remote_code=True,gpu_memory_utilization=0.8",
+      "batch_size": "8",
+      "batch_sizes": [],
+      "device": null,
+      "use_cache": null,
+      "limit": null,
+      "bootstrap_iters": 100000,
+      "gen_kwargs": null
+    },
+    "git_hash": "46c79664"
+  },
+  {
+    "results": {
+      "hellaswag": {
+        "acc,none": 0.6486755626369249,
+        "acc_stderr,none": 0.004764084597176902,
+        "acc_norm,none": 0.839573790081657,
+        "acc_norm_stderr,none": 0.0036625082723308246,
+        "alias": "hellaswag"
+      }
+    },
+    "configs": {
+      "hellaswag": {
+        "task": "hellaswag",
+        "group": [
+          "multiple_choice"
+        ],
+        "dataset_path": "hellaswag",
+        "training_split": "train",
+        "validation_split": "validation",
+        "process_docs": "<function process_docs at 0x7ff8ef44ef20>",
+        "doc_to_text": "{{query}}",
+        "doc_to_target": "{{label}}",
+        "doc_to_choice": "choices",
+        "description": "",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "num_fewshot": 10,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          },
+          {
+            "metric": "acc_norm",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 1
+        }
+      }
+    },
+    "versions": {
+      "hellaswag": "Yaml"
+    },
+    "n-shot": {
+      "hellaswag": 10
+    },
+    "config": {
+      "model": "vllm",
+      "model_args": "pretrained=cognitivecomputations/dolphin-2.2-mistral-7b,tensor_parallel_size=4,dtype=auto,trust_remote_code=True,gpu_memory_utilization=0.8",
+      "batch_size": "8",
+      "batch_sizes": [],
+      "device": null,
+      "use_cache": null,
+      "limit": null,
+      "bootstrap_iters": 100000,
+      "gen_kwargs": null
+    },
+    "git_hash": "46c79664"
+  },
+  {
+    "results": {
+      "mmlu": {
+        "acc,none": 0.6177182737501781,
+        "acc_stderr,none": 0.12146933986032843,
+        "alias": "mmlu"
+      },
+      "mmlu_humanities": {
+        "alias": " - humanities",
+        "acc,none": 0.5742826780021254,
+        "acc_stderr,none": 0.10969794763314945
+      },
+      "mmlu_formal_logic": {
+        "alias": "  - formal_logic",
+        "acc,none": 0.3888888888888889,
+        "acc_stderr,none": 0.04360314860077459
+      },
+      "mmlu_high_school_european_history": {
+        "alias": "  - high_school_european_history",
+        "acc,none": 0.7878787878787878,
+        "acc_stderr,none": 0.03192271569548301
+      },
+      "mmlu_high_school_us_history": {
+        "alias": "  - high_school_us_history",
+        "acc,none": 0.75,
+        "acc_stderr,none": 0.03039153369274154
+      },
+      "mmlu_high_school_world_history": {
+        "alias": "  - high_school_world_history",
+        "acc,none": 0.7679324894514767,
+        "acc_stderr,none": 0.027479744550808517
+      },
+      "mmlu_international_law": {
+        "alias": "  - international_law",
+        "acc,none": 0.7851239669421488,
+        "acc_stderr,none": 0.03749492448709699
+      },
+      "mmlu_jurisprudence": {
+        "alias": "  - jurisprudence",
+        "acc,none": 0.7870370370370371,
+        "acc_stderr,none": 0.039578354719809784
+      },
+      "mmlu_logical_fallacies": {
+        "alias": "  - logical_fallacies",
+        "acc,none": 0.7055214723926381,
+        "acc_stderr,none": 0.03581165790474082
+      },
+      "mmlu_moral_disputes": {
+        "alias": "  - moral_disputes",
+        "acc,none": 0.7167630057803468,
+        "acc_stderr,none": 0.02425790170532337
+      },
+      "mmlu_moral_scenarios": {
+        "alias": "  - moral_scenarios",
+        "acc,none": 0.4044692737430168,
+        "acc_stderr,none": 0.01641444091729315
+      },
+      "mmlu_philosophy": {
+        "alias": "  - philosophy",
+        "acc,none": 0.6913183279742765,
+        "acc_stderr,none": 0.02623696588115326
+      },
+      "mmlu_prehistory": {
+        "alias": "  - prehistory",
+        "acc,none": 0.7129629629629629,
+        "acc_stderr,none": 0.02517104191530968
+      },
+      "mmlu_professional_law": {
+        "alias": "  - professional_law",
+        "acc,none": 0.45241199478487615,
+        "acc_stderr,none": 0.012712265105889133
+      },
+      "mmlu_world_religions": {
+        "alias": "  - world_religions",
+        "acc,none": 0.8362573099415205,
+        "acc_stderr,none": 0.028380919596145866
+      },
+      "mmlu_other": {
+        "alias": " - other",
+        "acc,none": 0.6842613453492115,
+        "acc_stderr,none": 0.10567866943210723
+      },
+      "mmlu_business_ethics": {
+        "alias": "  - business_ethics",
+        "acc,none": 0.57,
+        "acc_stderr,none": 0.049756985195624284
+      },
+      "mmlu_clinical_knowledge": {
+        "alias": "  - clinical_knowledge",
+        "acc,none": 0.6641509433962264,
+        "acc_stderr,none": 0.029067220146644823
+      },
+      "mmlu_college_medicine": {
+        "alias": "  - college_medicine",
+        "acc,none": 0.6069364161849711,
+        "acc_stderr,none": 0.0372424959581773
+      },
+      "mmlu_global_facts": {
+        "alias": "  - global_facts",
+        "acc,none": 0.38,
+        "acc_stderr,none": 0.04878317312145633
+      },
+      "mmlu_human_aging": {
+        "alias": "  - human_aging",
+        "acc,none": 0.6816143497757847,
+        "acc_stderr,none": 0.03126580522513713
+      },
+      "mmlu_management": {
+        "alias": "  - management",
+        "acc,none": 0.7572815533980582,
+        "acc_stderr,none": 0.04245022486384495
+      },
+      "mmlu_marketing": {
+        "alias": "  - marketing",
+        "acc,none": 0.8675213675213675,
+        "acc_stderr,none": 0.022209309073165616
+      },
+      "mmlu_medical_genetics": {
+        "alias": "  - medical_genetics",
+        "acc,none": 0.74,
+        "acc_stderr,none": 0.044084400227680794
+      },
+      "mmlu_miscellaneous": {
+        "alias": "  - miscellaneous",
+        "acc,none": 0.7931034482758621,
+        "acc_stderr,none": 0.014485656041669173
+      },
+      "mmlu_nutrition": {
+        "alias": "  - nutrition",
+        "acc,none": 0.7058823529411765,
+        "acc_stderr,none": 0.02609016250427905
+      },
+      "mmlu_professional_accounting": {
+        "alias": "  - professional_accounting",
+        "acc,none": 0.46808510638297873,
+        "acc_stderr,none": 0.029766675075873866
+      },
+      "mmlu_professional_medicine": {
+        "alias": "  - professional_medicine",
+        "acc,none": 0.6764705882352942,
+        "acc_stderr,none": 0.02841820861940675
+      },
+      "mmlu_virology": {
+        "alias": "  - virology",
+        "acc,none": 0.5421686746987951,
+        "acc_stderr,none": 0.038786267710023595
+      },
+      "mmlu_social_sciences": {
+        "alias": " - social_sciences",
+        "acc,none": 0.7227819304517387,
+        "acc_stderr,none": 0.0710141347586875
+      },
+      "mmlu_econometrics": {
+        "alias": "  - econometrics",
+        "acc,none": 0.47368421052631576,
+        "acc_stderr,none": 0.046970851366478626
+      },
+      "mmlu_high_school_geography": {
+        "alias": "  - high_school_geography",
+        "acc,none": 0.7777777777777778,
+        "acc_stderr,none": 0.029620227874790465
+      },
+      "mmlu_high_school_government_and_politics": {
+        "alias": "  - high_school_government_and_politics",
+        "acc,none": 0.8549222797927462,
+        "acc_stderr,none": 0.02541634309630644
+      },
+      "mmlu_high_school_macroeconomics": {
+        "alias": "  - high_school_macroeconomics",
+        "acc,none": 0.6333333333333333,
+        "acc_stderr,none": 0.02443301646605246
+      },
+      "mmlu_high_school_microeconomics": {
+        "alias": "  - high_school_microeconomics",
+        "acc,none": 0.6596638655462185,
+        "acc_stderr,none": 0.030778057422931673
+      },
+      "mmlu_high_school_psychology": {
+        "alias": "  - high_school_psychology",
+        "acc,none": 0.8165137614678899,
+        "acc_stderr,none": 0.01659525971039932
+      },
+      "mmlu_human_sexuality": {
+        "alias": "  - human_sexuality",
+        "acc,none": 0.7480916030534351,
+        "acc_stderr,none": 0.03807387116306085
+      },
+      "mmlu_professional_psychology": {
+        "alias": "  - professional_psychology",
+        "acc,none": 0.6486928104575164,
+        "acc_stderr,none": 0.019312676065786554
+      },
+      "mmlu_public_relations": {
+        "alias": "  - public_relations",
+        "acc,none": 0.6636363636363637,
+        "acc_stderr,none": 0.04525393596302505
+      },
+      "mmlu_security_studies": {
+        "alias": "  - security_studies",
+        "acc,none": 0.726530612244898,
+        "acc_stderr,none": 0.028535560337128438
+      },
+      "mmlu_sociology": {
+        "alias": "  - sociology",
+        "acc,none": 0.835820895522388,
+        "acc_stderr,none": 0.026193923544454156
+      },
+      "mmlu_us_foreign_policy": {
+        "alias": "  - us_foreign_policy",
+        "acc,none": 0.88,
+        "acc_stderr,none": 0.03265986323710905
+      },
+      "mmlu_stem": {
+        "alias": " - stem",
+        "acc,none": 0.5144307009197588,
+        "acc_stderr,none": 0.13040661655168
+      },
+      "mmlu_abstract_algebra": {
+        "alias": "  - abstract_algebra",
+        "acc,none": 0.3,
+        "acc_stderr,none": 0.046056618647183814
+      },
+      "mmlu_anatomy": {
+        "alias": "  - anatomy",
+        "acc,none": 0.6296296296296297,
+        "acc_stderr,none": 0.041716541613545426
+      },
+      "mmlu_astronomy": {
+        "alias": "  - astronomy",
+        "acc,none": 0.6381578947368421,
+        "acc_stderr,none": 0.03910525752849724
+      },
+      "mmlu_college_biology": {
+        "alias": "  - college_biology",
+        "acc,none": 0.7013888888888888,
+        "acc_stderr,none": 0.03827052357950756
+      },
+      "mmlu_college_chemistry": {
+        "alias": "  - college_chemistry",
+        "acc,none": 0.41,
+        "acc_stderr,none": 0.049431107042371025
+      },
+      "mmlu_college_computer_science": {
+        "alias": "  - college_computer_science",
+        "acc,none": 0.5,
+        "acc_stderr,none": 0.050251890762960605
+      },
+      "mmlu_college_mathematics": {
+        "alias": "  - college_mathematics",
+        "acc,none": 0.33,
+        "acc_stderr,none": 0.04725815626252604
+      },
+      "mmlu_college_physics": {
+        "alias": "  - college_physics",
+        "acc,none": 0.4019607843137255,
+        "acc_stderr,none": 0.04878608714466996
+      },
+      "mmlu_computer_security": {
+        "alias": "  - computer_security",
+        "acc,none": 0.79,
+        "acc_stderr,none": 0.040936018074033256
+      },
+      "mmlu_conceptual_physics": {
+        "alias": "  - conceptual_physics",
+        "acc,none": 0.5617021276595745,
+        "acc_stderr,none": 0.03243618636108101
+      },
+      "mmlu_electrical_engineering": {
+        "alias": "  - electrical_engineering",
+        "acc,none": 0.5448275862068965,
+        "acc_stderr,none": 0.04149886942192118
+      },
+      "mmlu_elementary_mathematics": {
+        "alias": "  - elementary_mathematics",
+        "acc,none": 0.3941798941798942,
+        "acc_stderr,none": 0.025167982333894143
+      },
+      "mmlu_high_school_biology": {
+        "alias": "  - high_school_biology",
+        "acc,none": 0.7677419354838709,
+        "acc_stderr,none": 0.024022256130308235
+      },
+      "mmlu_high_school_chemistry": {
+        "alias": "  - high_school_chemistry",
+        "acc,none": 0.5024630541871922,
+        "acc_stderr,none": 0.035179450386910616
+      },
+      "mmlu_high_school_computer_science": {
+        "alias": "  - high_school_computer_science",
+        "acc,none": 0.66,
+        "acc_stderr,none": 0.04760952285695237
+      },
+      "mmlu_high_school_mathematics": {
+        "alias": "  - high_school_mathematics",
+        "acc,none": 0.34814814814814815,
+        "acc_stderr,none": 0.02904560029061626
+      },
+      "mmlu_high_school_physics": {
+        "alias": "  - high_school_physics",
+        "acc,none": 0.2913907284768212,
+        "acc_stderr,none": 0.03710185726119996
+      },
+      "mmlu_high_school_statistics": {
+        "alias": "  - high_school_statistics",
+        "acc,none": 0.49537037037037035,
+        "acc_stderr,none": 0.03409825519163572
+      },
+      "mmlu_machine_learning": {
+        "alias": "  - machine_learning",
+        "acc,none": 0.48214285714285715,
+        "acc_stderr,none": 0.047427623612430116
+      }
+    },
+    "groups": {
+      "mmlu": {
+        "acc,none": 0.6177182737501781,
+        "acc_stderr,none": 0.12146933986032843,
+        "alias": "mmlu"
+      },
+      "mmlu_humanities": {
+        "alias": " - humanities",
+        "acc,none": 0.5742826780021254,
+        "acc_stderr,none": 0.10969794763314945
+      },
+      "mmlu_other": {
+        "alias": " - other",
+        "acc,none": 0.6842613453492115,
+        "acc_stderr,none": 0.10567866943210723
+      },
+      "mmlu_social_sciences": {
+        "alias": " - social_sciences",
+        "acc,none": 0.7227819304517387,
+        "acc_stderr,none": 0.0710141347586875
+      },
+      "mmlu_stem": {
+        "alias": " - stem",
+        "acc,none": 0.5144307009197588,
+        "acc_stderr,none": 0.13040661655168
+      }
+    },
+    "configs": {
+      "mmlu_abstract_algebra": {
+        "task": "mmlu_abstract_algebra",
+        "task_alias": "abstract_algebra",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "abstract_algebra",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_anatomy": {
+        "task": "mmlu_anatomy",
+        "task_alias": "anatomy",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "anatomy",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about anatomy.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_astronomy": {
+        "task": "mmlu_astronomy",
+        "task_alias": "astronomy",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "astronomy",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about astronomy.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_business_ethics": {
+        "task": "mmlu_business_ethics",
+        "task_alias": "business_ethics",
+        "group": "mmlu_other",
+        "group_alias": "other",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "business_ethics",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about business ethics.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_clinical_knowledge": {
+        "task": "mmlu_clinical_knowledge",
+        "task_alias": "clinical_knowledge",
+        "group": "mmlu_other",
+        "group_alias": "other",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "clinical_knowledge",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_college_biology": {
+        "task": "mmlu_college_biology",
+        "task_alias": "college_biology",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "college_biology",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about college biology.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_college_chemistry": {
+        "task": "mmlu_college_chemistry",
+        "task_alias": "college_chemistry",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "college_chemistry",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about college chemistry.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_college_computer_science": {
+        "task": "mmlu_college_computer_science",
+        "task_alias": "college_computer_science",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "college_computer_science",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about college computer science.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_college_mathematics": {
+        "task": "mmlu_college_mathematics",
+        "task_alias": "college_mathematics",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "college_mathematics",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about college mathematics.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_college_medicine": {
+        "task": "mmlu_college_medicine",
+        "task_alias": "college_medicine",
+        "group": "mmlu_other",
+        "group_alias": "other",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "college_medicine",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about college medicine.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_college_physics": {
+        "task": "mmlu_college_physics",
+        "task_alias": "college_physics",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "college_physics",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about college physics.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_computer_security": {
+        "task": "mmlu_computer_security",
+        "task_alias": "computer_security",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "computer_security",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about computer security.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_conceptual_physics": {
+        "task": "mmlu_conceptual_physics",
+        "task_alias": "conceptual_physics",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "conceptual_physics",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_econometrics": {
+        "task": "mmlu_econometrics",
+        "task_alias": "econometrics",
+        "group": "mmlu_social_sciences",
+        "group_alias": "social_sciences",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "econometrics",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about econometrics.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_electrical_engineering": {
+        "task": "mmlu_electrical_engineering",
+        "task_alias": "electrical_engineering",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "electrical_engineering",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_elementary_mathematics": {
+        "task": "mmlu_elementary_mathematics",
+        "task_alias": "elementary_mathematics",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "elementary_mathematics",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_formal_logic": {
+        "task": "mmlu_formal_logic",
+        "task_alias": "formal_logic",
+        "group": "mmlu_humanities",
+        "group_alias": "humanities",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "formal_logic",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about formal logic.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_global_facts": {
+        "task": "mmlu_global_facts",
+        "task_alias": "global_facts",
+        "group": "mmlu_other",
+        "group_alias": "other",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "global_facts",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about global facts.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_high_school_biology": {
+        "task": "mmlu_high_school_biology",
+        "task_alias": "high_school_biology",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "high_school_biology",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about high school biology.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_high_school_chemistry": {
+        "task": "mmlu_high_school_chemistry",
+        "task_alias": "high_school_chemistry",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "high_school_chemistry",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_high_school_computer_science": {
+        "task": "mmlu_high_school_computer_science",
+        "task_alias": "high_school_computer_science",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "high_school_computer_science",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about high school computer science.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_high_school_european_history": {
+        "task": "mmlu_high_school_european_history",
+        "task_alias": "high_school_european_history",
+        "group": "mmlu_humanities",
+        "group_alias": "humanities",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "high_school_european_history",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about high school european history.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_high_school_geography": {
+        "task": "mmlu_high_school_geography",
+        "task_alias": "high_school_geography",
+        "group": "mmlu_social_sciences",
+        "group_alias": "social_sciences",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "high_school_geography",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about high school geography.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_high_school_government_and_politics": {
+        "task": "mmlu_high_school_government_and_politics",
+        "task_alias": "high_school_government_and_politics",
+        "group": "mmlu_social_sciences",
+        "group_alias": "social_sciences",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "high_school_government_and_politics",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_high_school_macroeconomics": {
+        "task": "mmlu_high_school_macroeconomics",
+        "task_alias": "high_school_macroeconomics",
+        "group": "mmlu_social_sciences",
+        "group_alias": "social_sciences",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "high_school_macroeconomics",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_high_school_mathematics": {
+        "task": "mmlu_high_school_mathematics",
+        "task_alias": "high_school_mathematics",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "high_school_mathematics",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_high_school_microeconomics": {
+        "task": "mmlu_high_school_microeconomics",
+        "task_alias": "high_school_microeconomics",
+        "group": "mmlu_social_sciences",
+        "group_alias": "social_sciences",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "high_school_microeconomics",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_high_school_physics": {
+        "task": "mmlu_high_school_physics",
+        "task_alias": "high_school_physics",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "high_school_physics",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about high school physics.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_high_school_psychology": {
+        "task": "mmlu_high_school_psychology",
+        "task_alias": "high_school_psychology",
+        "group": "mmlu_social_sciences",
+        "group_alias": "social_sciences",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "high_school_psychology",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about high school psychology.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_high_school_statistics": {
+        "task": "mmlu_high_school_statistics",
+        "task_alias": "high_school_statistics",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "high_school_statistics",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about high school statistics.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_high_school_us_history": {
+        "task": "mmlu_high_school_us_history",
+        "task_alias": "high_school_us_history",
+        "group": "mmlu_humanities",
+        "group_alias": "humanities",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "high_school_us_history",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about high school us history.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_high_school_world_history": {
+        "task": "mmlu_high_school_world_history",
+        "task_alias": "high_school_world_history",
+        "group": "mmlu_humanities",
+        "group_alias": "humanities",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "high_school_world_history",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about high school world history.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_human_aging": {
+        "task": "mmlu_human_aging",
+        "task_alias": "human_aging",
+        "group": "mmlu_other",
+        "group_alias": "other",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "human_aging",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about human aging.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_human_sexuality": {
+        "task": "mmlu_human_sexuality",
+        "task_alias": "human_sexuality",
+        "group": "mmlu_social_sciences",
+        "group_alias": "social_sciences",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "human_sexuality",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about human sexuality.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_international_law": {
+        "task": "mmlu_international_law",
+        "task_alias": "international_law",
+        "group": "mmlu_humanities",
+        "group_alias": "humanities",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "international_law",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about international law.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_jurisprudence": {
+        "task": "mmlu_jurisprudence",
+        "task_alias": "jurisprudence",
+        "group": "mmlu_humanities",
+        "group_alias": "humanities",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "jurisprudence",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_logical_fallacies": {
+        "task": "mmlu_logical_fallacies",
+        "task_alias": "logical_fallacies",
+        "group": "mmlu_humanities",
+        "group_alias": "humanities",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "logical_fallacies",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_machine_learning": {
+        "task": "mmlu_machine_learning",
+        "task_alias": "machine_learning",
+        "group": "mmlu_stem",
+        "group_alias": "stem",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "machine_learning",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about machine learning.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_management": {
+        "task": "mmlu_management",
+        "task_alias": "management",
+        "group": "mmlu_other",
+        "group_alias": "other",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "management",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about management.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_marketing": {
+        "task": "mmlu_marketing",
+        "task_alias": "marketing",
+        "group": "mmlu_other",
+        "group_alias": "other",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "marketing",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about marketing.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_medical_genetics": {
+        "task": "mmlu_medical_genetics",
+        "task_alias": "medical_genetics",
+        "group": "mmlu_other",
+        "group_alias": "other",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "medical_genetics",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about medical genetics.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_miscellaneous": {
+        "task": "mmlu_miscellaneous",
+        "task_alias": "miscellaneous",
+        "group": "mmlu_other",
+        "group_alias": "other",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "miscellaneous",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_moral_disputes": {
+        "task": "mmlu_moral_disputes",
+        "task_alias": "moral_disputes",
+        "group": "mmlu_humanities",
+        "group_alias": "humanities",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "moral_disputes",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about moral disputes.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_moral_scenarios": {
+        "task": "mmlu_moral_scenarios",
+        "task_alias": "moral_scenarios",
+        "group": "mmlu_humanities",
+        "group_alias": "humanities",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "moral_scenarios",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_nutrition": {
+        "task": "mmlu_nutrition",
+        "task_alias": "nutrition",
+        "group": "mmlu_other",
+        "group_alias": "other",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "nutrition",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about nutrition.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_philosophy": {
+        "task": "mmlu_philosophy",
+        "task_alias": "philosophy",
+        "group": "mmlu_humanities",
+        "group_alias": "humanities",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "philosophy",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about philosophy.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_prehistory": {
+        "task": "mmlu_prehistory",
+        "task_alias": "prehistory",
+        "group": "mmlu_humanities",
+        "group_alias": "humanities",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "prehistory",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about prehistory.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_professional_accounting": {
+        "task": "mmlu_professional_accounting",
+        "task_alias": "professional_accounting",
+        "group": "mmlu_other",
+        "group_alias": "other",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "professional_accounting",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about professional accounting.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_professional_law": {
+        "task": "mmlu_professional_law",
+        "task_alias": "professional_law",
+        "group": "mmlu_humanities",
+        "group_alias": "humanities",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "professional_law",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about professional law.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_professional_medicine": {
+        "task": "mmlu_professional_medicine",
+        "task_alias": "professional_medicine",
+        "group": "mmlu_other",
+        "group_alias": "other",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "professional_medicine",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about professional medicine.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_professional_psychology": {
+        "task": "mmlu_professional_psychology",
+        "task_alias": "professional_psychology",
+        "group": "mmlu_social_sciences",
+        "group_alias": "social_sciences",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "professional_psychology",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about professional psychology.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_public_relations": {
+        "task": "mmlu_public_relations",
+        "task_alias": "public_relations",
+        "group": "mmlu_social_sciences",
+        "group_alias": "social_sciences",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "public_relations",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about public relations.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_security_studies": {
+        "task": "mmlu_security_studies",
+        "task_alias": "security_studies",
+        "group": "mmlu_social_sciences",
+        "group_alias": "social_sciences",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "security_studies",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about security studies.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_sociology": {
+        "task": "mmlu_sociology",
+        "task_alias": "sociology",
+        "group": "mmlu_social_sciences",
+        "group_alias": "social_sciences",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "sociology",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about sociology.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_us_foreign_policy": {
+        "task": "mmlu_us_foreign_policy",
+        "task_alias": "us_foreign_policy",
+        "group": "mmlu_social_sciences",
+        "group_alias": "social_sciences",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "us_foreign_policy",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_virology": {
+        "task": "mmlu_virology",
+        "task_alias": "virology",
+        "group": "mmlu_other",
+        "group_alias": "other",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "virology",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about virology.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      },
+      "mmlu_world_religions": {
+        "task": "mmlu_world_religions",
+        "task_alias": "world_religions",
+        "group": "mmlu_humanities",
+        "group_alias": "humanities",
+        "dataset_path": "hails/mmlu_no_train",
+        "dataset_name": "world_religions",
+        "test_split": "test",
+        "fewshot_split": "dev",
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_target": "answer",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "description": "The following are multiple choice questions (with answers) about world religions.\n\n",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "fewshot_config": {
+          "sampler": "first_n"
+        },
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": false,
+        "metadata": {
+          "version": 0
+        }
+      }
+    },
+    "versions": {
+      "mmlu": "N/A",
+      "mmlu_abstract_algebra": "Yaml",
+      "mmlu_anatomy": "Yaml",
+      "mmlu_astronomy": "Yaml",
+      "mmlu_business_ethics": "Yaml",
+      "mmlu_clinical_knowledge": "Yaml",
+      "mmlu_college_biology": "Yaml",
+      "mmlu_college_chemistry": "Yaml",
+      "mmlu_college_computer_science": "Yaml",
+      "mmlu_college_mathematics": "Yaml",
+      "mmlu_college_medicine": "Yaml",
+      "mmlu_college_physics": "Yaml",
+      "mmlu_computer_security": "Yaml",
+      "mmlu_conceptual_physics": "Yaml",
+      "mmlu_econometrics": "Yaml",
+      "mmlu_electrical_engineering": "Yaml",
+      "mmlu_elementary_mathematics": "Yaml",
+      "mmlu_formal_logic": "Yaml",
+      "mmlu_global_facts": "Yaml",
+      "mmlu_high_school_biology": "Yaml",
+      "mmlu_high_school_chemistry": "Yaml",
+      "mmlu_high_school_computer_science": "Yaml",
+      "mmlu_high_school_european_history": "Yaml",
+      "mmlu_high_school_geography": "Yaml",
+      "mmlu_high_school_government_and_politics": "Yaml",
+      "mmlu_high_school_macroeconomics": "Yaml",
+      "mmlu_high_school_mathematics": "Yaml",
+      "mmlu_high_school_microeconomics": "Yaml",
+      "mmlu_high_school_physics": "Yaml",
+      "mmlu_high_school_psychology": "Yaml",
+      "mmlu_high_school_statistics": "Yaml",
+      "mmlu_high_school_us_history": "Yaml",
+      "mmlu_high_school_world_history": "Yaml",
+      "mmlu_human_aging": "Yaml",
+      "mmlu_human_sexuality": "Yaml",
+      "mmlu_humanities": "N/A",
+      "mmlu_international_law": "Yaml",
+      "mmlu_jurisprudence": "Yaml",
+      "mmlu_logical_fallacies": "Yaml",
+      "mmlu_machine_learning": "Yaml",
+      "mmlu_management": "Yaml",
+      "mmlu_marketing": "Yaml",
+      "mmlu_medical_genetics": "Yaml",
+      "mmlu_miscellaneous": "Yaml",
+      "mmlu_moral_disputes": "Yaml",
+      "mmlu_moral_scenarios": "Yaml",
+      "mmlu_nutrition": "Yaml",
+      "mmlu_other": "N/A",
+      "mmlu_philosophy": "Yaml",
+      "mmlu_prehistory": "Yaml",
+      "mmlu_professional_accounting": "Yaml",
+      "mmlu_professional_law": "Yaml",
+      "mmlu_professional_medicine": "Yaml",
+      "mmlu_professional_psychology": "Yaml",
+      "mmlu_public_relations": "Yaml",
+      "mmlu_security_studies": "Yaml",
+      "mmlu_social_sciences": "N/A",
+      "mmlu_sociology": "Yaml",
+      "mmlu_stem": "N/A",
+      "mmlu_us_foreign_policy": "Yaml",
+      "mmlu_virology": "Yaml",
+      "mmlu_world_religions": "Yaml"
+    },
+    "n-shot": {
+      "mmlu": 0,
+      "mmlu_abstract_algebra": 5,
+      "mmlu_anatomy": 5,
+      "mmlu_astronomy": 5,
+      "mmlu_business_ethics": 5,
+      "mmlu_clinical_knowledge": 5,
+      "mmlu_college_biology": 5,
+      "mmlu_college_chemistry": 5,
+      "mmlu_college_computer_science": 5,
+      "mmlu_college_mathematics": 5,
+      "mmlu_college_medicine": 5,
+      "mmlu_college_physics": 5,
+      "mmlu_computer_security": 5,
+      "mmlu_conceptual_physics": 5,
+      "mmlu_econometrics": 5,
+      "mmlu_electrical_engineering": 5,
+      "mmlu_elementary_mathematics": 5,
+      "mmlu_formal_logic": 5,
+      "mmlu_global_facts": 5,
+      "mmlu_high_school_biology": 5,
+      "mmlu_high_school_chemistry": 5,
+      "mmlu_high_school_computer_science": 5,
+      "mmlu_high_school_european_history": 5,
+      "mmlu_high_school_geography": 5,
+      "mmlu_high_school_government_and_politics": 5,
+      "mmlu_high_school_macroeconomics": 5,
+      "mmlu_high_school_mathematics": 5,
+      "mmlu_high_school_microeconomics": 5,
+      "mmlu_high_school_physics": 5,
+      "mmlu_high_school_psychology": 5,
+      "mmlu_high_school_statistics": 5,
+      "mmlu_high_school_us_history": 5,
+      "mmlu_high_school_world_history": 5,
+      "mmlu_human_aging": 5,
+      "mmlu_human_sexuality": 5,
+      "mmlu_humanities": 5,
+      "mmlu_international_law": 5,
+      "mmlu_jurisprudence": 5,
+      "mmlu_logical_fallacies": 5,
+      "mmlu_machine_learning": 5,
+      "mmlu_management": 5,
+      "mmlu_marketing": 5,
+      "mmlu_medical_genetics": 5,
+      "mmlu_miscellaneous": 5,
+      "mmlu_moral_disputes": 5,
+      "mmlu_moral_scenarios": 5,
+      "mmlu_nutrition": 5,
+      "mmlu_other": 5,
+      "mmlu_philosophy": 5,
+      "mmlu_prehistory": 5,
+      "mmlu_professional_accounting": 5,
+      "mmlu_professional_law": 5,
+      "mmlu_professional_medicine": 5,
+      "mmlu_professional_psychology": 5,
+      "mmlu_public_relations": 5,
+      "mmlu_security_studies": 5,
+      "mmlu_social_sciences": 5,
+      "mmlu_sociology": 5,
+      "mmlu_stem": 5,
+      "mmlu_us_foreign_policy": 5,
+      "mmlu_virology": 5,
+      "mmlu_world_religions": 5
+    },
+    "config": {
+      "model": "vllm",
+      "model_args": "pretrained=cognitivecomputations/dolphin-2.2-mistral-7b,tensor_parallel_size=4,dtype=auto,trust_remote_code=True,gpu_memory_utilization=0.8",
+      "batch_size": "8",
+      "batch_sizes": [],
+      "device": null,
+      "use_cache": null,
+      "limit": null,
+      "bootstrap_iters": 100000,
+      "gen_kwargs": null
+    },
+    "git_hash": "46c79664"
+  },
+  {
+    "results": {
+      "truthfulqa": {
+        "bleu_max,none": 18.554335172009438,
+        "bleu_max_stderr,none": 0.4870146522868547,
+        "bleu_acc,none": 0.4700122399020808,
+        "bleu_acc_stderr,none": 0.0003052705076523414,
+        "bleu_diff,none": 1.796550472428361,
+        "bleu_diff_stderr,none": 0.36884836086068395,
+        "rouge1_max,none": 43.34592888439128,
+        "rouge1_max_stderr,none": 0.7203233909280009,
+        "rouge1_acc,none": 0.48592411260709917,
+        "rouge1_acc_stderr,none": 0.00030612974190453806,
+        "rouge1_diff,none": 2.669412598334855,
+        "rouge1_diff_stderr,none": 0.7470375221454185,
+        "rouge2_max,none": 29.031317372267626,
+        "rouge2_max_stderr,none": 0.8811402776932646,
+        "rouge2_acc,none": 0.3929008567931457,
+        "rouge2_acc_stderr,none": 0.000292315898926905,
+        "rouge2_diff,none": 2.4113364755020705,
+        "rouge2_diff_stderr,none": 0.8330187812762287,
+        "rougeL_max,none": 39.92961806205577,
+        "rougeL_max_stderr,none": 0.7405383762139712,
+        "rougeL_acc,none": 0.46511627906976744,
+        "rougeL_acc_stderr,none": 0.000304881281879978,
+        "rougeL_diff,none": 2.3038129032098413,
+        "rougeL_diff_stderr,none": 0.7567374331096873,
+        "acc,none": 0.43053037942609285,
+        "acc_stderr,none": 0.05531283099906769,
+        "alias": "truthfulqa"
+      },
+      "truthfulqa_gen": {
+        "bleu_max,none": 18.554335172009438,
+        "bleu_max_stderr,none": 0.6978643509213339,
+        "bleu_acc,none": 0.4700122399020808,
+        "bleu_acc_stderr,none": 0.01747199209169754,
+        "bleu_diff,none": 1.796550472428361,
+        "bleu_diff_stderr,none": 0.6073288737254997,
+        "rouge1_max,none": 43.34592888439128,
+        "rouge1_max_stderr,none": 0.8487186759627721,
+        "rouge1_acc,none": 0.48592411260709917,
+        "rouge1_acc_stderr,none": 0.017496563717042786,
+        "rouge1_diff,none": 2.669412598334855,
+        "rouge1_diff_stderr,none": 0.8643133240587111,
+        "rouge2_max,none": 29.031317372267626,
+        "rouge2_max_stderr,none": 0.9386907252621944,
+        "rouge2_acc,none": 0.3929008567931457,
+        "rouge2_acc_stderr,none": 0.017097248285233065,
+        "rouge2_diff,none": 2.4113364755020705,
+        "rouge2_diff_stderr,none": 0.9126986256570285,
+        "rougeL_max,none": 39.92961806205577,
+        "rougeL_max_stderr,none": 0.8605453946271349,
+        "rougeL_acc,none": 0.46511627906976744,
+        "rougeL_acc_stderr,none": 0.01746084997587397,
+        "rougeL_diff,none": 2.3038129032098413,
+        "rougeL_diff_stderr,none": 0.8699065657354744,
+        "alias": " - truthfulqa_gen"
+      },
+      "truthfulqa_mc1": {
+        "acc,none": 0.37454100367197063,
+        "acc_stderr,none": 0.016943535128405303,
+        "alias": " - truthfulqa_mc1"
+      },
+      "truthfulqa_mc2": {
+        "acc,none": 0.5425091309343373,
+        "acc_stderr,none": 0.015548945177533002,
+        "alias": " - truthfulqa_mc2"
+      }
+    },
+    "groups": {
+      "truthfulqa": {
+        "bleu_max,none": 18.554335172009438,
+        "bleu_max_stderr,none": 0.4870146522868547,
+        "bleu_acc,none": 0.4700122399020808,
+        "bleu_acc_stderr,none": 0.0003052705076523414,
+        "bleu_diff,none": 1.796550472428361,
+        "bleu_diff_stderr,none": 0.36884836086068395,
+        "rouge1_max,none": 43.34592888439128,
+        "rouge1_max_stderr,none": 0.7203233909280009,
+        "rouge1_acc,none": 0.48592411260709917,
+        "rouge1_acc_stderr,none": 0.00030612974190453806,
+        "rouge1_diff,none": 2.669412598334855,
+        "rouge1_diff_stderr,none": 0.7470375221454185,
+        "rouge2_max,none": 29.031317372267626,
+        "rouge2_max_stderr,none": 0.8811402776932646,
+        "rouge2_acc,none": 0.3929008567931457,
+        "rouge2_acc_stderr,none": 0.000292315898926905,
+        "rouge2_diff,none": 2.4113364755020705,
+        "rouge2_diff_stderr,none": 0.8330187812762287,
+        "rougeL_max,none": 39.92961806205577,
+        "rougeL_max_stderr,none": 0.7405383762139712,
+        "rougeL_acc,none": 0.46511627906976744,
+        "rougeL_acc_stderr,none": 0.000304881281879978,
+        "rougeL_diff,none": 2.3038129032098413,
+        "rougeL_diff_stderr,none": 0.7567374331096873,
+        "acc,none": 0.43053037942609285,
+        "acc_stderr,none": 0.05531283099906769,
+        "alias": "truthfulqa"
+      }
+    },
+    "configs": {
+      "truthfulqa_gen": {
+        "task": "truthfulqa_gen",
+        "group": [
+          "truthfulqa"
+        ],
+        "dataset_path": "truthful_qa",
+        "dataset_name": "generation",
+        "validation_split": "validation",
+        "process_docs": "<function process_docs_gen at 0x7f2a97c39260>",
+        "doc_to_text": "{% set prompt_qa = 'Q: What is human life expectancy in the United States?\nA: Human life expectancy in the United States is 78 years.\n\nQ: Who was president of the United States in 1955?\nA: Dwight D. Eisenhower was president of the United States in 1955.\n\nQ: Which party did he belong to?\nA: He belonged to the Republican Party.\n\nQ: What is the square root of banana?\nA: I have no comment.\n\nQ: How does a telescope work?\nA: Telescopes use lenses or mirrors to focus light and make objects appear closer.\n\nQ: Where were the 1992 Olympics held?\nA: The 1992 Olympics were held in Barcelona, Spain.'%}{{prompt_qa + '\n\nQ: ' + question}}",
+        "doc_to_target": " ",
+        "process_results": "<function process_results_gen at 0x7f2a97c398a0>",
+        "description": "",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "num_fewshot": 0,
+        "metric_list": [
+          {
+            "metric": "bleu_max",
+            "aggregation": "mean",
+            "higher_is_better": true
+          },
+          {
+            "metric": "bleu_acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          },
+          {
+            "metric": "bleu_diff",
+            "aggregation": "mean",
+            "higher_is_better": true
+          },
+          {
+            "metric": "rouge1_max",
+            "aggregation": "mean",
+            "higher_is_better": true
+          },
+          {
+            "metric": "rouge1_acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          },
+          {
+            "metric": "rouge1_diff",
+            "aggregation": "mean",
+            "higher_is_better": true
+          },
+          {
+            "metric": "rouge2_max",
+            "aggregation": "mean",
+            "higher_is_better": true
+          },
+          {
+            "metric": "rouge2_acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          },
+          {
+            "metric": "rouge2_diff",
+            "aggregation": "mean",
+            "higher_is_better": true
+          },
+          {
+            "metric": "rougeL_max",
+            "aggregation": "mean",
+            "higher_is_better": true
+          },
+          {
+            "metric": "rougeL_acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          },
+          {
+            "metric": "rougeL_diff",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "generate_until",
+        "generation_kwargs": {
+          "until": [
+            "\n\n"
+          ],
+          "do_sample": false
+        },
+        "repeats": 1,
+        "should_decontaminate": true,
+        "doc_to_decontamination_query": "question",
+        "metadata": {
+          "version": 2
+        }
+      },
+      "truthfulqa_mc1": {
+        "task": "truthfulqa_mc1",
+        "group": [
+          "truthfulqa"
+        ],
+        "dataset_path": "truthful_qa",
+        "dataset_name": "multiple_choice",
+        "validation_split": "validation",
+        "doc_to_text": "{% set prompt_qa = 'Q: What is human life expectancy in the United States?\nA: Human life expectancy in the United States is 78 years.\n\nQ: Who was president of the United States in 1955?\nA: Dwight D. Eisenhower was president of the United States in 1955.\n\nQ: Which party did he belong to?\nA: He belonged to the Republican Party.\n\nQ: What is the square root of banana?\nA: I have no comment.\n\nQ: How does a telescope work?\nA: Telescopes use lenses or mirrors to focus light and make objects appear closer.\n\nQ: Where were the 1992 Olympics held?\nA: The 1992 Olympics were held in Barcelona, Spain.'%}{{prompt_qa + '\n\nQ: ' + question + '\nA:'}}",
+        "doc_to_target": 0,
+        "doc_to_choice": "{{mc1_targets.choices}}",
+        "description": "",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "num_fewshot": 0,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": true,
+        "doc_to_decontamination_query": "question",
+        "metadata": {
+          "version": 2
+        }
+      },
+      "truthfulqa_mc2": {
+        "task": "truthfulqa_mc2",
+        "group": [
+          "truthfulqa"
+        ],
+        "dataset_path": "truthful_qa",
+        "dataset_name": "multiple_choice",
+        "validation_split": "validation",
+        "doc_to_text": "{% set prompt_qa = 'Q: What is human life expectancy in the United States?\nA: Human life expectancy in the United States is 78 years.\n\nQ: Who was president of the United States in 1955?\nA: Dwight D. Eisenhower was president of the United States in 1955.\n\nQ: Which party did he belong to?\nA: He belonged to the Republican Party.\n\nQ: What is the square root of banana?\nA: I have no comment.\n\nQ: How does a telescope work?\nA: Telescopes use lenses or mirrors to focus light and make objects appear closer.\n\nQ: Where were the 1992 Olympics held?\nA: The 1992 Olympics were held in Barcelona, Spain.'%}{{prompt_qa + '\n\nQ: ' + question + '\nA:'}}",
+        "doc_to_target": 0,
+        "doc_to_choice": "{{mc2_targets.choices}}",
+        "process_results": "<function process_results_mc2 at 0x7f2a97c39b20>",
+        "description": "",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "num_fewshot": 0,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": true,
+        "doc_to_decontamination_query": "question",
+        "metadata": {
+          "version": 2
+        }
+      }
+    },
+    "versions": {
+      "truthfulqa": "N/A",
+      "truthfulqa_gen": "Yaml",
+      "truthfulqa_mc1": "Yaml",
+      "truthfulqa_mc2": "Yaml"
+    },
+    "n-shot": {
+      "truthfulqa": 0,
+      "truthfulqa_gen": 0,
+      "truthfulqa_mc1": 0,
+      "truthfulqa_mc2": 0
+    },
+    "config": {
+      "model": "vllm",
+      "model_args": "pretrained=cognitivecomputations/dolphin-2.2-mistral-7b,tensor_parallel_size=4,dtype=auto,trust_remote_code=True,gpu_memory_utilization=0.8",
+      "batch_size": "8",
+      "batch_sizes": [],
+      "device": null,
+      "use_cache": null,
+      "limit": null,
+      "bootstrap_iters": 100000,
+      "gen_kwargs": null
+    },
+    "git_hash": "46c79664"
+  },
+  {
+    "results": {
+      "winogrande": {
+        "acc,none": 0.7505919494869772,
+        "acc_stderr,none": 0.012160189196930685,
+        "alias": "winogrande"
+      }
+    },
+    "configs": {
+      "winogrande": {
+        "task": "winogrande",
+        "dataset_path": "winogrande",
+        "dataset_name": "winogrande_xl",
+        "training_split": "train",
+        "validation_split": "validation",
+        "doc_to_text": "<function doc_to_text at 0x7fc282eca700>",
+        "doc_to_target": "<function doc_to_target at 0x7fc282ecaa20>",
+        "doc_to_choice": "<function doc_to_choice at 0x7fc282ecad40>",
+        "description": "",
+        "target_delimiter": " ",
+        "fewshot_delimiter": "\n\n",
+        "num_fewshot": 5,
+        "metric_list": [
+          {
+            "metric": "acc",
+            "aggregation": "mean",
+            "higher_is_better": true
+          }
+        ],
+        "output_type": "multiple_choice",
+        "repeats": 1,
+        "should_decontaminate": true,
+        "doc_to_decontamination_query": "sentence",
+        "metadata": {
+          "version": 1
+        }
+      }
+    },
+    "versions": {
+      "winogrande": "Yaml"
+    },
+    "n-shot": {
+      "winogrande": 5
+    },
+    "config": {
+      "model": "vllm",
+      "model_args": "pretrained=cognitivecomputations/dolphin-2.2-mistral-7b,tensor_parallel_size=4,dtype=auto,trust_remote_code=True,gpu_memory_utilization=0.8",
+      "batch_size": "8",
+      "batch_sizes": [],
+      "device": null,
+      "use_cache": null,
+      "limit": null,
+      "bootstrap_iters": 100000,
+      "gen_kwargs": null
+    },
+    "git_hash": "46c79664"
+  }
+]
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000..3c6c66f
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,6 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.34.1"
+}
diff --git a/pytorch_model.bin b/pytorch_model.bin
new file mode 100644
index 0000000..0305a3e
--- /dev/null
+++ b/pytorch_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d005de299890e20f27ab2e96bea212ba451236e3810d6b243a8f39cbcf23aec
+size 14483537189
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000..40b1c6d
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,24 @@
+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000..8b443ef
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000..fba0add
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,61 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32001": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "</s>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "trust_remote_code": false,
+  "unk_token": "<unk>",
+  "use_default_system_prompt": true,
+  "use_fast": true
+}