初始化项目,由ModelHub XC社区提供模型
Model: HoangTran223/SFT_5e-5_Qwen2.5-1.5B_Ultrafb_2e Source: Original Platform
This commit is contained in:
70
leaderboard_results_base_model/arc_challenge_results.json
Normal file
70
leaderboard_results_base_model/arc_challenge_results.json
Normal file
@@ -0,0 +1,70 @@
|
||||
{
|
||||
"results": {
|
||||
"arc_challenge": {
|
||||
"acc,none": 0.492320819112628,
|
||||
"acc_stderr,none": 0.014609667440892515,
|
||||
"acc_norm,none": 0.5460750853242321,
|
||||
"acc_norm_stderr,none": 0.014549221105171829,
|
||||
"alias": "arc_challenge"
|
||||
}
|
||||
},
|
||||
"configs": {
|
||||
"arc_challenge": {
|
||||
"task": "arc_challenge",
|
||||
"group": [
|
||||
"ai2_arc"
|
||||
],
|
||||
"dataset_path": "ai2_arc",
|
||||
"dataset_name": "ARC-Challenge",
|
||||
"training_split": "train",
|
||||
"validation_split": "validation",
|
||||
"test_split": "test",
|
||||
"doc_to_text": "Question: {{question}}\nAnswer:",
|
||||
"doc_to_target": "{{choices.label.index(answerKey)}}",
|
||||
"doc_to_choice": "{{choices.text}}",
|
||||
"description": "",
|
||||
"target_delimiter": " ",
|
||||
"fewshot_delimiter": "\n\n",
|
||||
"num_fewshot": 25,
|
||||
"metric_list": [
|
||||
{
|
||||
"metric": "acc",
|
||||
"aggregation": "mean",
|
||||
"higher_is_better": true
|
||||
},
|
||||
{
|
||||
"metric": "acc_norm",
|
||||
"aggregation": "mean",
|
||||
"higher_is_better": true
|
||||
}
|
||||
],
|
||||
"output_type": "multiple_choice",
|
||||
"repeats": 1,
|
||||
"should_decontaminate": true,
|
||||
"doc_to_decontamination_query": "Question: {{question}}\nAnswer:",
|
||||
"metadata": [
|
||||
{
|
||||
"version": 1.0
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"versions": {
|
||||
"arc_challenge": "Yaml"
|
||||
},
|
||||
"n-shot": {
|
||||
"arc_challenge": 25
|
||||
},
|
||||
"config": {
|
||||
"model": "vllm",
|
||||
"model_args": "pretrained=/home/hungpv/projects/SWIFT/model_hub/Qwen2.5-1.5B,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.8",
|
||||
"batch_size": "auto",
|
||||
"batch_sizes": [],
|
||||
"device": null,
|
||||
"use_cache": null,
|
||||
"limit": null,
|
||||
"bootstrap_iters": 100000,
|
||||
"gen_kwargs": {}
|
||||
},
|
||||
"git_hash": "24768f7"
|
||||
}
|
||||
90
leaderboard_results_base_model/gsm8k_results.json
Normal file
90
leaderboard_results_base_model/gsm8k_results.json
Normal file
@@ -0,0 +1,90 @@
|
||||
{
|
||||
"results": {
|
||||
"gsm8k": {
|
||||
"exact_match,get-answer": 0.6087945413191812,
|
||||
"exact_match_stderr,get-answer": 0.013442502402794302,
|
||||
"alias": "gsm8k"
|
||||
}
|
||||
},
|
||||
"configs": {
|
||||
"gsm8k": {
|
||||
"task": "gsm8k",
|
||||
"group": [
|
||||
"math_word_problems"
|
||||
],
|
||||
"dataset_path": "gsm8k",
|
||||
"dataset_name": "main",
|
||||
"training_split": "train",
|
||||
"test_split": "test",
|
||||
"fewshot_split": "train",
|
||||
"doc_to_text": "Question: {{question}}\nAnswer:",
|
||||
"doc_to_target": "{{answer}}",
|
||||
"description": "",
|
||||
"target_delimiter": " ",
|
||||
"fewshot_delimiter": "\n\n",
|
||||
"num_fewshot": 5,
|
||||
"metric_list": [
|
||||
{
|
||||
"metric": "exact_match",
|
||||
"aggregation": "mean",
|
||||
"higher_is_better": true,
|
||||
"ignore_case": true,
|
||||
"ignore_punctuation": false,
|
||||
"regexes_to_ignore": [
|
||||
",",
|
||||
"\\$",
|
||||
"(?s).*#### "
|
||||
]
|
||||
}
|
||||
],
|
||||
"output_type": "generate_until",
|
||||
"generation_kwargs": {
|
||||
"until": [
|
||||
"\n\n",
|
||||
"Question:"
|
||||
],
|
||||
"do_sample": false,
|
||||
"temperature": 0.0
|
||||
},
|
||||
"repeats": 1,
|
||||
"filter_list": [
|
||||
{
|
||||
"name": "get-answer",
|
||||
"filter": [
|
||||
{
|
||||
"function": "regex",
|
||||
"regex_pattern": "#### (\\-?[0-9\\.\\,]+)"
|
||||
},
|
||||
{
|
||||
"function": "take_first"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"should_decontaminate": false,
|
||||
"metadata": [
|
||||
{
|
||||
"version": 1.0
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"versions": {
|
||||
"gsm8k": "Yaml"
|
||||
},
|
||||
"n-shot": {
|
||||
"gsm8k": 5
|
||||
},
|
||||
"config": {
|
||||
"model": "vllm",
|
||||
"model_args": "pretrained=/home/hungpv/projects/SWIFT/model_hub/Qwen2.5-1.5B,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.8",
|
||||
"batch_size": "auto",
|
||||
"batch_sizes": [],
|
||||
"device": null,
|
||||
"use_cache": null,
|
||||
"limit": null,
|
||||
"bootstrap_iters": 100000,
|
||||
"gen_kwargs": {}
|
||||
},
|
||||
"git_hash": "24768f7"
|
||||
}
|
||||
68
leaderboard_results_base_model/hellaswag_results.json
Normal file
68
leaderboard_results_base_model/hellaswag_results.json
Normal file
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"results": {
|
||||
"hellaswag": {
|
||||
"acc,none": 0.5028878709420435,
|
||||
"acc_stderr,none": 0.004989698183207901,
|
||||
"acc_norm,none": 0.6787492531368253,
|
||||
"acc_norm_stderr,none": 0.004660025270816849,
|
||||
"alias": "hellaswag"
|
||||
}
|
||||
},
|
||||
"configs": {
|
||||
"hellaswag": {
|
||||
"task": "hellaswag",
|
||||
"group": [
|
||||
"multiple_choice"
|
||||
],
|
||||
"dataset_path": "hellaswag",
|
||||
"training_split": "train",
|
||||
"validation_split": "validation",
|
||||
"process_docs": "<function process_docs at 0x7f84c1667240>",
|
||||
"doc_to_text": "{{query}}",
|
||||
"doc_to_target": "{{label}}",
|
||||
"doc_to_choice": "{{choices}}",
|
||||
"description": "",
|
||||
"target_delimiter": " ",
|
||||
"fewshot_delimiter": "\n\n",
|
||||
"num_fewshot": 10,
|
||||
"metric_list": [
|
||||
{
|
||||
"metric": "acc",
|
||||
"aggregation": "mean",
|
||||
"higher_is_better": true
|
||||
},
|
||||
{
|
||||
"metric": "acc_norm",
|
||||
"aggregation": "mean",
|
||||
"higher_is_better": true
|
||||
}
|
||||
],
|
||||
"output_type": "multiple_choice",
|
||||
"repeats": 1,
|
||||
"should_decontaminate": false,
|
||||
"metadata": [
|
||||
{
|
||||
"version": 1.0
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"versions": {
|
||||
"hellaswag": "Yaml"
|
||||
},
|
||||
"n-shot": {
|
||||
"hellaswag": 10
|
||||
},
|
||||
"config": {
|
||||
"model": "vllm",
|
||||
"model_args": "pretrained=/home/hungpv/projects/SWIFT/model_hub/Qwen2.5-1.5B,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.8",
|
||||
"batch_size": "auto",
|
||||
"batch_sizes": [],
|
||||
"device": null,
|
||||
"use_cache": null,
|
||||
"limit": null,
|
||||
"bootstrap_iters": 100000,
|
||||
"gen_kwargs": {}
|
||||
},
|
||||
"git_hash": "24768f7"
|
||||
}
|
||||
2763
leaderboard_results_base_model/mmlu_results.json
Normal file
2763
leaderboard_results_base_model/mmlu_results.json
Normal file
File diff suppressed because it is too large
Load Diff
62
leaderboard_results_base_model/truthfulqa_mc2_results.json
Normal file
62
leaderboard_results_base_model/truthfulqa_mc2_results.json
Normal file
@@ -0,0 +1,62 @@
|
||||
{
|
||||
"results": {
|
||||
"truthfulqa_mc2": {
|
||||
"acc,none": 0.46611639164182933,
|
||||
"acc_stderr,none": 0.01483185593624306,
|
||||
"alias": "truthfulqa_mc2"
|
||||
}
|
||||
},
|
||||
"configs": {
|
||||
"truthfulqa_mc2": {
|
||||
"task": "truthfulqa_mc2",
|
||||
"group": [
|
||||
"truthfulqa"
|
||||
],
|
||||
"dataset_path": "truthful_qa",
|
||||
"dataset_name": "multiple_choice",
|
||||
"validation_split": "validation",
|
||||
"doc_to_text": "{% set prompt_qa = 'Q: What is human life expectancy in the United States?\nA: Human life expectancy in the United States is 78 years.\n\nQ: Who was president of the United States in 1955?\nA: Dwight D. Eisenhower was president of the United States in 1955.\n\nQ: Which party did he belong to?\nA: He belonged to the Republican Party.\n\nQ: What is the square root of banana?\nA: I have no comment.\n\nQ: How does a telescope work?\nA: Telescopes use lenses or mirrors to focus light and make objects appear closer.\n\nQ: Where were the 1992 Olympics held?\nA: The 1992 Olympics were held in Barcelona, Spain.'%}{{prompt_qa + '\n\nQ: ' + question + '\nA:'}}",
|
||||
"doc_to_target": 0,
|
||||
"doc_to_choice": "{{mc2_targets.choices}}",
|
||||
"process_results": "<function process_results_mc2 at 0x7fa1b3088360>",
|
||||
"description": "",
|
||||
"target_delimiter": " ",
|
||||
"fewshot_delimiter": "\n\n",
|
||||
"num_fewshot": 0,
|
||||
"metric_list": [
|
||||
{
|
||||
"metric": "acc",
|
||||
"aggregation": "mean",
|
||||
"higher_is_better": true
|
||||
}
|
||||
],
|
||||
"output_type": "multiple_choice",
|
||||
"repeats": 1,
|
||||
"should_decontaminate": true,
|
||||
"doc_to_decontamination_query": "question",
|
||||
"metadata": [
|
||||
{
|
||||
"version": 2.0
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"versions": {
|
||||
"truthfulqa_mc2": "Yaml"
|
||||
},
|
||||
"n-shot": {
|
||||
"truthfulqa_mc2": 0
|
||||
},
|
||||
"config": {
|
||||
"model": "vllm",
|
||||
"model_args": "pretrained=/home/hungpv/projects/SWIFT/model_hub/Qwen2.5-1.5B,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.8",
|
||||
"batch_size": "auto",
|
||||
"batch_sizes": [],
|
||||
"device": null,
|
||||
"use_cache": null,
|
||||
"limit": null,
|
||||
"bootstrap_iters": 100000,
|
||||
"gen_kwargs": {}
|
||||
},
|
||||
"git_hash": "24768f7"
|
||||
}
|
||||
59
leaderboard_results_base_model/winogrande_results.json
Normal file
59
leaderboard_results_base_model/winogrande_results.json
Normal file
@@ -0,0 +1,59 @@
|
||||
{
|
||||
"results": {
|
||||
"winogrande": {
|
||||
"acc,none": 0.632991318074191,
|
||||
"acc_stderr,none": 0.013546284512919693,
|
||||
"alias": "winogrande"
|
||||
}
|
||||
},
|
||||
"configs": {
|
||||
"winogrande": {
|
||||
"task": "winogrande",
|
||||
"dataset_path": "winogrande",
|
||||
"dataset_name": "winogrande_xl",
|
||||
"training_split": "train",
|
||||
"validation_split": "validation",
|
||||
"doc_to_text": "<function doc_to_text at 0x7f3304801440>",
|
||||
"doc_to_target": "<function doc_to_target at 0x7f3304801800>",
|
||||
"doc_to_choice": "<function doc_to_choice at 0x7f3304801bc0>",
|
||||
"description": "",
|
||||
"target_delimiter": " ",
|
||||
"fewshot_delimiter": "\n\n",
|
||||
"num_fewshot": 5,
|
||||
"metric_list": [
|
||||
{
|
||||
"metric": "acc",
|
||||
"aggregation": "mean",
|
||||
"higher_is_better": true
|
||||
}
|
||||
],
|
||||
"output_type": "multiple_choice",
|
||||
"repeats": 1,
|
||||
"should_decontaminate": true,
|
||||
"doc_to_decontamination_query": "sentence",
|
||||
"metadata": [
|
||||
{
|
||||
"version": 1.0
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"versions": {
|
||||
"winogrande": "Yaml"
|
||||
},
|
||||
"n-shot": {
|
||||
"winogrande": 5
|
||||
},
|
||||
"config": {
|
||||
"model": "vllm",
|
||||
"model_args": "pretrained=/home/hungpv/projects/SWIFT/model_hub/Qwen2.5-1.5B,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.8",
|
||||
"batch_size": "auto",
|
||||
"batch_sizes": [],
|
||||
"device": null,
|
||||
"use_cache": null,
|
||||
"limit": null,
|
||||
"bootstrap_iters": 100000,
|
||||
"gen_kwargs": {}
|
||||
},
|
||||
"git_hash": "24768f7"
|
||||
}
|
||||
Reference in New Issue
Block a user