From 387822214844efc01df669c66a48b53881ae6084 Mon Sep 17 00:00:00 2001 From: Orenguteng Date: Tue, 3 Sep 2024 16:14:02 +0000 Subject: [PATCH] Adding Evaluation Results (#4) - Adding Evaluation Results (b29674c4894783a1b6c199a5d2d7c8864e116f58) Co-authored-by: Open LLM Leaderboard PR Bot --- README.md | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/README.md b/README.md index c700374..8c50147 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,100 @@ --- license: llama3.1 +model-index: +- name: Llama-3.1-8B-Lexi-Uncensored-V2 + results: + - task: + type: text-generation + name: Text Generation + dataset: + name: IFEval (0-Shot) + type: HuggingFaceH4/ifeval + args: + num_few_shot: 0 + metrics: + - type: inst_level_strict_acc and prompt_level_strict_acc + value: 77.92 + name: strict accuracy + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: BBH (3-Shot) + type: BBH + args: + num_few_shot: 3 + metrics: + - type: acc_norm + value: 29.69 + name: normalized accuracy + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: MATH Lvl 5 (4-Shot) + type: hendrycks/competition_math + args: + num_few_shot: 4 + metrics: + - type: exact_match + value: 16.92 + name: exact match + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: GPQA (0-shot) + type: Idavidrein/gpqa + args: + num_few_shot: 0 + metrics: + - type: acc_norm + value: 4.36 + name: acc_norm + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: MuSR (0-shot) + type: TAUR-Lab/MuSR + args: + num_few_shot: 0 + metrics: + - type: acc_norm + value: 7.77 + name: acc_norm + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: MMLU-PRO (5-shot) + type: TIGER-Lab/MMLU-Pro + config: main + split: test + args: + num_few_shot: 5 + metrics: + - type: acc + value: 30.9 + name: accuracy + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2 + name: Open LLM Leaderboard --- ![image/png](https://cdn-uploads.huggingface.co/production/uploads/644ad182f434a6a63b18eee6/7mnEJyioRzQaWz8xLM4KI.png) @@ -42,3 +137,17 @@ If you find any issues or have suggestions for improvements, feel free to leave ![image/png](https://cdn-uploads.huggingface.co/production/uploads/644ad182f434a6a63b18eee6/uqJv-R1LeJEfMxi1nmTH5.png) + +# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard) +Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_Orenguteng__Llama-3.1-8B-Lexi-Uncensored-V2) + +| Metric |Value| +|-------------------|----:| +|Avg. |27.93| +|IFEval (0-Shot) |77.92| +|BBH (3-Shot) |29.69| +|MATH Lvl 5 (4-Shot)|16.92| +|GPQA (0-shot) | 4.36| +|MuSR (0-shot) | 7.77| +|MMLU-PRO (5-shot) |30.90| +