diff --git a/README.md b/README.md index 63d2f38..2c30a31 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,113 @@ --- +language: +- en license: apache-2.0 datasets: - habanoz/airoboros-3.1-no-mathjson-max-1k -language: -- en pipeline_tag: text-generation +model-index: +- name: TinyLlama-1.1B-intermediate-step-715k-1.5T-lr-5-1epch-airoboros3.1-1k-instruct-V1 + results: + - task: + type: text-generation + name: Text Generation + dataset: + name: AI2 Reasoning Challenge (25-Shot) + type: ai2_arc + config: ARC-Challenge + split: test + args: + num_few_shot: 25 + metrics: + - type: acc_norm + value: 30.72 + name: normalized accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=habanoz/TinyLlama-1.1B-intermediate-step-715k-1.5T-lr-5-1epch-airoboros3.1-1k-instruct-V1 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: HellaSwag (10-Shot) + type: hellaswag + split: validation + args: + num_few_shot: 10 + metrics: + - type: acc_norm + value: 54.32 + name: normalized accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=habanoz/TinyLlama-1.1B-intermediate-step-715k-1.5T-lr-5-1epch-airoboros3.1-1k-instruct-V1 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: MMLU (5-Shot) + type: cais/mmlu + config: all + split: test + args: + num_few_shot: 5 + metrics: + - type: acc + value: 24.78 + name: accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=habanoz/TinyLlama-1.1B-intermediate-step-715k-1.5T-lr-5-1epch-airoboros3.1-1k-instruct-V1 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: TruthfulQA (0-shot) + type: truthful_qa + config: multiple_choice + split: validation + args: + num_few_shot: 0 + metrics: + - type: mc2 + value: 41.67 + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=habanoz/TinyLlama-1.1B-intermediate-step-715k-1.5T-lr-5-1epch-airoboros3.1-1k-instruct-V1 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: Winogrande (5-shot) + type: winogrande + config: winogrande_xl + split: validation + args: + num_few_shot: 5 + metrics: + - type: acc + value: 57.62 + name: accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=habanoz/TinyLlama-1.1B-intermediate-step-715k-1.5T-lr-5-1epch-airoboros3.1-1k-instruct-V1 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: GSM8k (5-shot) + type: gsm8k + config: main + split: test + args: + num_few_shot: 5 + metrics: + - type: acc + value: 0.76 + name: accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=habanoz/TinyLlama-1.1B-intermediate-step-715k-1.5T-lr-5-1epch-airoboros3.1-1k-instruct-V1 + name: Open LLM Leaderboard --- TinyLlama-1.1B-intermediate-step-715k-1.5T finetuned using airoboros-3.1-no-mathjson-max-1k dataset. @@ -58,4 +161,17 @@ accelerate launch $BASE_DIR/qlora/train.py \ --gradient_checkpointing \ --use_flash_attention_2 \ --ddp_find_unused_parameters False - ``` \ No newline at end of file + ``` +# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) +Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_habanoz__TinyLlama-1.1B-intermediate-step-715k-1.5T-lr-5-1epch-airoboros3.1-1k-instruct-V1) + +| Metric |Value| +|---------------------------------|----:| +|Avg. |34.98| +|AI2 Reasoning Challenge (25-Shot)|30.72| +|HellaSwag (10-Shot) |54.32| +|MMLU (5-Shot) |24.78| +|TruthfulQA (0-shot) |41.67| +|Winogrande (5-shot) |57.62| +|GSM8k (5-shot) | 0.76| +