240 lines
8.1 KiB
Python
240 lines
8.1 KiB
Python
# %%
|
|
# ----------------------------------------------------------
|
|
# Custom Hugging-Face pipeline for the “bonus” split that refers to the existing models
|
|
# Task id : quizbowl-bonus
|
|
# Expected input keys : leadin, part, previous_parts ('text' and 'guess')
|
|
# Must return : answer, confidence, explanation
|
|
# ----------------------------------------------------------
|
|
|
|
|
|
import json_repair
|
|
import torch
|
|
from datasets import Dataset
|
|
from loguru import logger
|
|
from torch.nn import functional as F
|
|
from tqdm.auto import tqdm
|
|
from transformers import Pipeline, pipeline
|
|
from transformers.models.llama.modeling_llama import LlamaForCausalLM
|
|
from transformers.pipelines import PIPELINE_REGISTRY
|
|
|
|
|
|
def format_part(number: int, text: str, guess: str) -> str:
|
|
return f"\t * Part {number}: {text}\n\t * Model Guess: {guess}"
|
|
|
|
|
|
system_prompt = """
|
|
You are a quizbowl player. Given the a leadin and your responses to the previous related parts, provide the answer, a brief (1-2 sentences) explanation to the provided question along with your confidence in the guess.
|
|
The answer should be a single word or short phrase, and the explanation should be concise and relevant to the question.
|
|
The answer should be formatted in the below JSON format:
|
|
|
|
{
|
|
"answer": str,
|
|
"explanation": str,
|
|
"confidence": float (0-1 in the steps of 0.01)
|
|
"justification": str (optional justification for the confidence score)
|
|
}
|
|
The confidence should be a float between 0 and 1, representing your confidence in the answer.
|
|
"""
|
|
|
|
user_prompt_template = """
|
|
"Leadin: {leadin}
|
|
Question: {part}"{image_note}
|
|
What is being asked in the question? Provide a concise answer, a brief explanation, and your confidence in the guess along with justification."""
|
|
|
|
|
|
def _bonus_image_note(leadin_images, part_images) -> str:
|
|
li = leadin_images or []
|
|
pi = part_images or []
|
|
if not li and not pi:
|
|
return ""
|
|
return (
|
|
f"\n\n[This bonus includes {len(li)} leadin image(s) and {len(pi)} part image(s); "
|
|
"this text-only pipeline does not see pixels—use a VLM pipeline with "
|
|
"`leadin_images` / `part_images`.]"
|
|
)
|
|
|
|
|
|
def prepare_conversation(leadin, part, image_note: str = ""):
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": system_prompt,
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": user_prompt_template.format(
|
|
leadin=leadin, part=part, image_note=image_note
|
|
),
|
|
},
|
|
]
|
|
return messages
|
|
|
|
|
|
def parse_output_text(output_text: str):
|
|
try:
|
|
start_index = output_text.find("{")
|
|
if start_index == -1:
|
|
raise ValueError("No JSON object found in the output text.")
|
|
output_text = output_text[start_index:]
|
|
json_data = json_repair.loads(output_text)
|
|
if isinstance(json_data, list):
|
|
json_data = json_data[0]
|
|
answer = json_data.get("answer", "").strip()
|
|
explanation = json_data.get("explanation", "").strip()
|
|
confidence = json_data.get("confidence", 0.0)
|
|
except Exception as e:
|
|
logger.warning(
|
|
f"Error parsing JSON: {e.__class__.__name__} - {e}. Got:\n{output_text}"
|
|
)
|
|
answer, explanation, confidence = "", "", 0.0
|
|
|
|
try:
|
|
confidence = float(confidence)
|
|
confidence = max(0.0, min(1.0, confidence))
|
|
except ValueError:
|
|
logger.warning(f"Invalid confidence value: {confidence}. Defaulting to 0.0.")
|
|
confidence = 0.0
|
|
return {
|
|
"answer": answer,
|
|
"explanation": explanation,
|
|
"confidence": confidence,
|
|
}
|
|
|
|
|
|
def postprocess_response(output_text, scores=None):
|
|
model_response = parse_output_text(output_text)
|
|
|
|
# Compute a confidence score by averaging the max softmax probabilities over generated tokens.
|
|
if scores is not None and len(scores) > 0:
|
|
probs = [F.softmax(score, dim=-1).max().item() for score in scores]
|
|
logit_confidence = float(sum(probs) / len(probs)) if probs else 0.0
|
|
model_response["confidence"] = (
|
|
model_response["confidence"] + logit_confidence
|
|
) / 2
|
|
|
|
return model_response
|
|
|
|
|
|
class BonusPipeline(Pipeline):
|
|
def __init__(self, model, tokenizer, **kwargs):
|
|
super().__init__(
|
|
model=model,
|
|
tokenizer=tokenizer,
|
|
**kwargs,
|
|
)
|
|
self.tokenizer.padding_side = "left"
|
|
self.tokenizer.pad_token = self.tokenizer.eos_token
|
|
|
|
def _sanitize_parameters(self, **kwargs):
|
|
# No additional parameters needed
|
|
return {}, {}, {}
|
|
|
|
def preprocess(self, inputs):
|
|
batch_size = len(inputs["leadin"])
|
|
leadin_imgs = inputs.get("leadin_images") or [[] for _ in range(batch_size)]
|
|
part_imgs = inputs.get("part_images") or [[] for _ in range(batch_size)]
|
|
conversations = []
|
|
for i in range(batch_size):
|
|
note = _bonus_image_note(leadin_imgs[i], part_imgs[i])
|
|
conversations.append(
|
|
prepare_conversation(inputs["leadin"][i], inputs["part"][i], image_note=note)
|
|
)
|
|
|
|
model_inputs = self.tokenizer.apply_chat_template(
|
|
conversations,
|
|
add_generation_prompt=True,
|
|
tokenize=True,
|
|
return_dict=True,
|
|
padding=True,
|
|
return_tensors="pt",
|
|
)
|
|
return model_inputs
|
|
|
|
def _forward(self, model_inputs):
|
|
# Do not use output_scores=True: it materializes full-vocab logits each step and
|
|
# routinely OOMs mid-size GPUs (e.g. T4). postprocess() only uses decoded text.
|
|
with torch.no_grad():
|
|
full = self.model.generate(
|
|
**model_inputs,
|
|
max_new_tokens=64,
|
|
)
|
|
input_length = model_inputs["input_ids"].shape[1]
|
|
|
|
class _GenOut:
|
|
__slots__ = ("sequences",)
|
|
|
|
def __init__(self, sequences):
|
|
self.sequences = sequences
|
|
|
|
return _GenOut(full[:, input_length:])
|
|
|
|
def postprocess(self, model_outputs):
|
|
output_texts = self.tokenizer.batch_decode(
|
|
model_outputs.sequences, skip_special_tokens=True
|
|
)
|
|
records = []
|
|
|
|
for output_text in output_texts:
|
|
record = postprocess_response(output_text)
|
|
records.append(record)
|
|
return records
|
|
|
|
|
|
PIPELINE_REGISTRY.register_pipeline(
|
|
"quizbowl-bonus",
|
|
pipeline_class=BonusPipeline,
|
|
pt_model=LlamaForCausalLM,
|
|
default={
|
|
"pt": ("meta-llama/Llama-3.2-3B-Instruct", "main"),
|
|
},
|
|
type="text",
|
|
)
|
|
# %%
|
|
if __name__ == "__main__":
|
|
import os
|
|
|
|
import torch
|
|
from transformers import BitsAndBytesConfig
|
|
|
|
# Full precision (default): ``device_map="auto"`` only.
|
|
# Tight GPU (e.g. HF Space T4 with an 8B checkpoint): ``LLAMA3_BONUS_4BIT=1 pip install bitsandbytes`` first.
|
|
model_kwargs: dict = {"device_map": "auto"}
|
|
if os.environ.get("LLAMA3_BONUS_4BIT", "").strip().lower() in ("1", "true", "yes", "on"):
|
|
model_kwargs["quantization_config"] = BitsAndBytesConfig(
|
|
load_in_4bit=True,
|
|
bnb_4bit_compute_dtype=torch.bfloat16,
|
|
bnb_4bit_use_double_quant=True,
|
|
bnb_4bit_quant_type="nf4",
|
|
)
|
|
|
|
pipe = pipeline("quizbowl-bonus", trust_remote_code=True, model_kwargs=model_kwargs)
|
|
|
|
examples = [
|
|
{
|
|
"leadin": "This is a leadin.",
|
|
"part": "What is the capital of France?",
|
|
},
|
|
{
|
|
"leadin": "This is another leadin.",
|
|
"part": "What is the largest planet in our solar system?",
|
|
"previous_parts": [
|
|
{"text": "What is the smallest planet?", "guess": "Mercury"},
|
|
{"text": "What is the second smallest planet?", "guess": "Mars"},
|
|
],
|
|
},
|
|
{
|
|
"leadin": "This is a leadin with no previous parts.",
|
|
"part": "What is the chemical symbol for water?",
|
|
"previous_parts": [],
|
|
},
|
|
] * 5
|
|
|
|
dataset = Dataset.from_list(examples)
|
|
|
|
print("Dataset size:", len(dataset))
|
|
outputs = []
|
|
batch_size = 5
|
|
for batch in tqdm(dataset.batch(batch_size), desc="Processing batches"):
|
|
output = pipe(batch, batch_size=batch_size)
|
|
outputs.extend(output)
|