diff --git a/README.md b/README.md index 9708d21..005666f 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,108 @@ --- -base_model: -- princeton-nlp/Llama-3-Instruct-8B-SimPO -- UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3 +license: llama3 library_name: transformers tags: - mergekit - merge -license: llama3 +base_model: +- princeton-nlp/Llama-3-Instruct-8B-SimPO +- UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3 pipeline_tag: text-generation +model-index: +- name: Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge + results: + - task: + type: text-generation + name: Text Generation + dataset: + name: IFEval (0-Shot) + type: HuggingFaceH4/ifeval + args: + num_few_shot: 0 + metrics: + - type: inst_level_strict_acc and prompt_level_strict_acc + value: 42.71 + name: strict accuracy + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: BBH (3-Shot) + type: BBH + args: + num_few_shot: 3 + metrics: + - type: acc_norm + value: 28.26 + name: normalized accuracy + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: MATH Lvl 5 (4-Shot) + type: hendrycks/competition_math + args: + num_few_shot: 4 + metrics: + - type: exact_match + value: 9.37 + name: exact match + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: GPQA (0-shot) + type: Idavidrein/gpqa + args: + num_few_shot: 0 + metrics: + - type: acc_norm + value: 5.37 + name: acc_norm + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: MuSR (0-shot) + type: TAUR-Lab/MuSR + args: + num_few_shot: 0 + metrics: + - type: acc_norm + value: 9.54 + name: acc_norm + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: MMLU-PRO (5-shot) + type: TIGER-Lab/MMLU-Pro + config: main + split: test + args: + num_few_shot: 5 + metrics: + - type: acc + value: 29.17 + name: accuracy + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge + name: Open LLM Leaderboard --- # Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge @@ -63,3 +158,17 @@ parameters: dtype: bfloat16 ``` + +# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard) +Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_grimjim__Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge) + +| Metric |Value| +|-------------------|----:| +|Avg. |20.74| +|IFEval (0-Shot) |42.71| +|BBH (3-Shot) |28.26| +|MATH Lvl 5 (4-Shot)| 9.37| +|GPQA (0-shot) | 5.37| +|MuSR (0-shot) | 9.54| +|MMLU-PRO (5-shot) |29.17| +