Adding Evaluation Results (#1)

- Adding Evaluation Results (1b67be324314b0c5ca026fc130682db4d42e713e) Co-authored-by: Open LLM Leaderboard PR Bot <leaderboard-pr-bot@users.noreply.huggingface.co>
2024-09-19 13:18:09 +00:00
parent 1fe849c1e7
commit c35899dadc
1 changed files with 110 additions and 2 deletions
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
 ---
 license: other
 tags:
 - merge
 - mergekit
@@ -10,7 +11,101 @@ base_model:
 - mlabonne/ChimeraLlama-3-8B-v2
 - nbeerbower/llama-3-stella-8B
 - uygarkurt/llama-3-merged-linear
-license: other
+model-index:
 - name: NeuralLLaMa-3-8b-DT-v0.1
  results:
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: IFEval (0-Shot)
      type: HuggingFaceH4/ifeval
      args:
        num_few_shot: 0
    metrics:
    - type: inst_level_strict_acc and prompt_level_strict_acc
      value: 43.71
      name: strict accuracy
    source:
      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Kukedlc/NeuralLLaMa-3-8b-DT-v0.1
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: BBH (3-Shot)
      type: BBH
      args:
        num_few_shot: 3
    metrics:
    - type: acc_norm
      value: 28.01
      name: normalized accuracy
    source:
      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Kukedlc/NeuralLLaMa-3-8b-DT-v0.1
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: MATH Lvl 5 (4-Shot)
      type: hendrycks/competition_math
      args:
        num_few_shot: 4
    metrics:
    - type: exact_match
      value: 7.25
      name: exact match
    source:
      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Kukedlc/NeuralLLaMa-3-8b-DT-v0.1
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: GPQA (0-shot)
      type: Idavidrein/gpqa
      args:
        num_few_shot: 0
    metrics:
    - type: acc_norm
      value: 7.05
      name: acc_norm
    source:
      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Kukedlc/NeuralLLaMa-3-8b-DT-v0.1
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: MuSR (0-shot)
      type: TAUR-Lab/MuSR
      args:
        num_few_shot: 0
    metrics:
    - type: acc_norm
      value: 9.69
      name: acc_norm
    source:
      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Kukedlc/NeuralLLaMa-3-8b-DT-v0.1
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: MMLU-PRO (5-shot)
      type: TIGER-Lab/MMLU-Pro
      config: main
      split: test
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 31.02
      name: accuracy
    source:
      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Kukedlc/NeuralLLaMa-3-8b-DT-v0.1
      name: Open LLM Leaderboard
 ---
 # NeuralLLaMa-3-8b-DT-v0.1
@@ -89,4 +184,17 @@ stop_token = "<|eot_id|>"
 stop = tokenizer.encode(stop_token)[0]
 _ = model.generate(**inputs, streamer=streamer, max_new_tokens=1024, do_sample=True, temperature=0.7, repetition_penalty=1.2, top_p=0.9, eos_token_id=stop)
-```
+```
 # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
 Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_Kukedlc__NeuralLLaMa-3-8b-DT-v0.1)
 |      Metric       |Value|
 |-------------------|----:|
 |Avg.               |21.12|
 |IFEval (0-Shot)    |43.71|
 |BBH (3-Shot)       |28.01|
 |MATH Lvl 5 (4-Shot)| 7.25|
 |GPQA (0-shot)      | 7.05|
 |MuSR (0-shot)      | 9.69|
 |MMLU-PRO (5-shot)  |31.02|