Add CI for gpt-oss model on hopper (#8851)
This commit is contained in:
@@ -65,9 +65,10 @@ def run_eval(args):
|
||||
|
||||
sampler = ChatCompletionSampler(
|
||||
model=args.model,
|
||||
max_tokens=2048,
|
||||
max_tokens=getattr(args, "max_tokens", 2048),
|
||||
base_url=base_url,
|
||||
temperature=getattr(args, "temperature", 0.0),
|
||||
reasoning_effort=getattr(args, "reasoning_effort", None),
|
||||
)
|
||||
|
||||
# Run eval
|
||||
@@ -120,7 +121,9 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--eval-name", type=str, default="mmlu")
|
||||
parser.add_argument("--num-examples", type=int)
|
||||
parser.add_argument("--num-threads", type=int, default=512)
|
||||
parser.add_argument("--max-tokens", type=int, default=2048)
|
||||
parser.add_argument("--temperature", type=float, default=0.0)
|
||||
parser.add_argument("--reasoning-effort", type=str)
|
||||
args = parser.parse_args()
|
||||
|
||||
run_eval(args)
|
||||
|
||||
@@ -91,6 +91,7 @@ class ChatCompletionSampler(SamplerBase):
|
||||
model: Optional[str] = None,
|
||||
system_message: Optional[str] = None,
|
||||
temperature: float = 0.0,
|
||||
reasoning_effort: Optional[str] = None,
|
||||
max_tokens: int = 2048,
|
||||
):
|
||||
self.client = OpenAI(base_url=base_url, http_client=LargerHttpxClient())
|
||||
@@ -102,7 +103,11 @@ class ChatCompletionSampler(SamplerBase):
|
||||
self.system_message = system_message
|
||||
self.temperature = temperature
|
||||
self.max_tokens = max_tokens
|
||||
self.reasoning_effort = reasoning_effort
|
||||
self.image_format = "url"
|
||||
print(
|
||||
f"ChatCompletionSampler initialized with {self.system_message=} {self.temperature=} {self.max_tokens=} {self.reasoning_effort=}"
|
||||
)
|
||||
|
||||
def _handle_image(
|
||||
self,
|
||||
@@ -138,6 +143,7 @@ class ChatCompletionSampler(SamplerBase):
|
||||
messages=message_list,
|
||||
temperature=self.temperature,
|
||||
max_tokens=self.max_tokens,
|
||||
reasoning_effort=self.reasoning_effort,
|
||||
)
|
||||
return response.choices[0].message.content
|
||||
# NOTE: BadRequestError is triggered once for MMMU, please uncomment if you are rerunning MMMU
|
||||
|
||||
@@ -71,6 +71,8 @@ class GPQAEval(Eval):
|
||||
)
|
||||
]
|
||||
response_text = sampler(prompt_messages)
|
||||
if response_text is None:
|
||||
response_text = ""
|
||||
match = re.search(ANSWER_PATTERN_MULTICHOICE, response_text)
|
||||
extracted_answer = match.group(1) if match else None
|
||||
score = 1.0 if extracted_answer == correct_answer else 0.0
|
||||
|
||||
Reference in New Issue
Block a user