From af01c777bb6ac33cc08cd390701ba80db423b38b Mon Sep 17 00:00:00 2001 From: Eugenio Schiavoni Date: Thu, 23 May 2024 13:48:51 +0000 Subject: [PATCH] Adding Evaluation Results (#1) - Adding Evaluation Results (c1cf0263a4821f0c27aedc76148a0ce1f15617b6) Co-authored-by: Open LLM Leaderboard PR Bot --- README.md | 118 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 09a4bf6..ea5b027 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,109 @@ license: apache-2.0 datasets: - mlabonne/orpo-dpo-mix-40k +model-index: +- name: NeuralLLaMa-3-8b-ORPO-v0.3 + results: + - task: + type: text-generation + name: Text Generation + dataset: + name: AI2 Reasoning Challenge (25-Shot) + type: ai2_arc + config: ARC-Challenge + split: test + args: + num_few_shot: 25 + metrics: + - type: acc_norm + value: 69.54 + name: normalized accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: HellaSwag (10-Shot) + type: hellaswag + split: validation + args: + num_few_shot: 10 + metrics: + - type: acc_norm + value: 84.9 + name: normalized accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: MMLU (5-Shot) + type: cais/mmlu + config: all + split: test + args: + num_few_shot: 5 + metrics: + - type: acc + value: 68.39 + name: accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: TruthfulQA (0-shot) + type: truthful_qa + config: multiple_choice + split: validation + args: + num_few_shot: 0 + metrics: + - type: mc2 + value: 60.82 + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: Winogrande (5-shot) + type: winogrande + config: winogrande_xl + split: validation + args: + num_few_shot: 5 + metrics: + - type: acc + value: 79.4 + name: accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: GSM8k (5-shot) + type: gsm8k + config: main + split: test + args: + num_few_shot: 5 + metrics: + - type: acc + value: 72.93 + name: accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Kukedlc/NeuralLLaMa-3-8b-ORPO-v0.3 + name: Open LLM Leaderboard --- --- library_name: transformers @@ -201,4 +304,17 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator] ## Model Card Contact -[More Information Needed] \ No newline at end of file +[More Information Needed] +# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) +Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_Kukedlc__NeuralLLaMa-3-8b-ORPO-v0.3) + +| Metric |Value| +|---------------------------------|----:| +|Avg. |72.66| +|AI2 Reasoning Challenge (25-Shot)|69.54| +|HellaSwag (10-Shot) |84.90| +|MMLU (5-Shot) |68.39| +|TruthfulQA (0-shot) |60.82| +|Winogrande (5-shot) |79.40| +|GSM8k (5-shot) |72.93| +