86 lines
2.8 KiB
Markdown
86 lines
2.8 KiB
Markdown
## Usage
|
|
|
|
```python
|
|
!pip install unsloth -q
|
|
|
|
from unsloth import FastModel
|
|
import torch, json, re
|
|
|
|
model, tokenizer = FastModel.from_pretrained(
|
|
model_name = "hienbm/gemma-2-9b-mtaste-16bit",
|
|
max_seq_length = 2048,
|
|
load_in_4bit = True,
|
|
)
|
|
FastModel.for_inference(model)
|
|
|
|
ASPECT_CATEGORIES = [
|
|
"AMBIENCE#GENERAL", "DRINKS#PRICES", "DRINKS#QUALITY", "DRINKS#STYLE_OPTIONS",
|
|
"FOOD#PRICES", "FOOD#QUALITY", "FOOD#STYLE_OPTIONS", "LOCATION#GENERAL",
|
|
"RESTAURANT#GENERAL", "RESTAURANT#MISCELLANEOUS", "RESTAURANT#PRICES", "SERVICE#GENERAL",
|
|
]
|
|
|
|
INSTRUCTION = (
|
|
"Given a restaurant review, extract all sentiment triplets.\n"
|
|
"Read the ENTIRE review first to understand context, sarcasm, and irony.\n"
|
|
"Then extract triplets SENTENCE BY SENTENCE in the ORDER they appear.\n\n"
|
|
"Output a JSON array sorted by appearance order in the review:\n"
|
|
'[{"target": <word/phrase or "NULL">, '
|
|
'"aspect": <ASPECT#CATEGORY>, "polarity": <positive|negative|neutral>}]\n\n'
|
|
"aspect must be one of: " + ", ".join(ASPECT_CATEGORIES) + "\n\n"
|
|
"Rules:\n"
|
|
"- Sentence order: extract from sentence 1 first, then sentence 2, etc.\n"
|
|
"- Multiple triplets per sentence: one object per triplet, keep order\n"
|
|
"- target: exact word/phrase from text, or NULL if implicit\n"
|
|
"- Output ONLY the JSON array, no explanation\n\n"
|
|
'Example:\nReview: "Food was great. Service was slow."\n'
|
|
'Output: [{"target": "food", "aspect": "FOOD#QUALITY", "polarity": "positive"}, '
|
|
'{"target": "NULL", "aspect": "SERVICE#GENERAL", "polarity": "negative"}]'
|
|
)
|
|
|
|
def build_prompt(text: str) -> str:
|
|
return (
|
|
"<start_of_turn>user\n"
|
|
f"{INSTRUCTION}\n\nReview: {text}"
|
|
"<end_of_turn>\n"
|
|
"<start_of_turn>model\n"
|
|
)
|
|
|
|
def parse_output(raw: str) -> list[dict]:
|
|
match = re.search(r"\[.*?\]", raw, re.DOTALL)
|
|
if match:
|
|
try:
|
|
return json.loads(match.group())
|
|
except json.JSONDecodeError:
|
|
pass
|
|
try:
|
|
return json.loads(raw)
|
|
except json.JSONDecodeError:
|
|
return []
|
|
|
|
review = "This place serves fast, it's been over 30 minutes and the dish still hasn't come out."
|
|
|
|
inputs = tokenizer(build_prompt(review), return_tensors="pt").to("cuda")
|
|
|
|
with torch.no_grad():
|
|
output = model.generate(
|
|
**inputs,
|
|
max_new_tokens = 1024,
|
|
temperature = 0.8,
|
|
do_sample = False,
|
|
pad_token_id = tokenizer.eos_token_id,
|
|
)
|
|
|
|
raw = tokenizer.decode(
|
|
output[0][inputs["input_ids"].shape[1]:],
|
|
skip_special_tokens=True,
|
|
).strip()
|
|
|
|
triplets = parse_output(raw)
|
|
|
|
print(f"Review : {review}\n")
|
|
print(f"Raw output : {raw}\n")
|
|
print("Extracted triplets:")
|
|
for i, t in enumerate(triplets, 1):
|
|
print(f" {i}. target={t['target']!r:20s} aspect={t['aspect']:30s} polarity={t['polarity']}")
|
|
|
|
``` |