Fix accuracy test (#1051)
This commit is contained in:
@@ -16,6 +16,8 @@ from sglang.test.simple_eval_common import (
|
||||
|
||||
|
||||
def run_eval(args):
|
||||
set_ulimit()
|
||||
|
||||
if "OPENAI_API_KEY" not in os.environ:
|
||||
os.environ["OPENAI_API_KEY"] = "EMPTY"
|
||||
|
||||
@@ -117,7 +119,6 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--eval-name", type=str, default="mmlu")
|
||||
parser.add_argument("--num-examples", type=int)
|
||||
parser.add_argument("--num-threads", type=int, default=512)
|
||||
set_ulimit()
|
||||
args = parser.parse_args()
|
||||
|
||||
run_eval(args)
|
||||
|
||||
@@ -6,21 +6,15 @@ Mark Chen and Jerry Tworek and Heewoo Jun and Qiming Yuan and Henrique Ponde de
|
||||
https://arxiv.org/abs/2107.03374 https://github.com/openai/human-eval/
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import multiprocessing
|
||||
import random
|
||||
import re
|
||||
from collections import Counter, defaultdict
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from io import BytesIO
|
||||
from typing import Any, Dict, List, Tuple
|
||||
from typing import Dict, List
|
||||
|
||||
import blobfile as bf
|
||||
import tqdm
|
||||
|
||||
try:
|
||||
from human_eval.data import HUMAN_EVAL, read_problems
|
||||
from human_eval.data import read_problems
|
||||
from human_eval.evaluation import estimate_pass_at_k
|
||||
from human_eval.execution import check_correctness # , unsafe_execute
|
||||
except (ImportError, ModuleNotFoundError):
|
||||
|
||||
Reference in New Issue
Block a user