初始化项目,由ModelHub XC社区提供模型
Model: hienbm/gemma-2-9b-mtaste-16bit Source: Original Platform
This commit is contained in:
86
README.md
Normal file
86
README.md
Normal file
@@ -0,0 +1,86 @@
|
||||
## Usage
|
||||
|
||||
```python
|
||||
!pip install unsloth -q
|
||||
|
||||
from unsloth import FastModel
|
||||
import torch, json, re
|
||||
|
||||
model, tokenizer = FastModel.from_pretrained(
|
||||
model_name = "hienbm/gemma-2-9b-mtaste-16bit",
|
||||
max_seq_length = 2048,
|
||||
load_in_4bit = True,
|
||||
)
|
||||
FastModel.for_inference(model)
|
||||
|
||||
ASPECT_CATEGORIES = [
|
||||
"AMBIENCE#GENERAL", "DRINKS#PRICES", "DRINKS#QUALITY", "DRINKS#STYLE_OPTIONS",
|
||||
"FOOD#PRICES", "FOOD#QUALITY", "FOOD#STYLE_OPTIONS", "LOCATION#GENERAL",
|
||||
"RESTAURANT#GENERAL", "RESTAURANT#MISCELLANEOUS", "RESTAURANT#PRICES", "SERVICE#GENERAL",
|
||||
]
|
||||
|
||||
INSTRUCTION = (
|
||||
"Given a restaurant review, extract all sentiment triplets.\n"
|
||||
"Read the ENTIRE review first to understand context, sarcasm, and irony.\n"
|
||||
"Then extract triplets SENTENCE BY SENTENCE in the ORDER they appear.\n\n"
|
||||
"Output a JSON array sorted by appearance order in the review:\n"
|
||||
'[{"target": <word/phrase or "NULL">, '
|
||||
'"aspect": <ASPECT#CATEGORY>, "polarity": <positive|negative|neutral>}]\n\n'
|
||||
"aspect must be one of: " + ", ".join(ASPECT_CATEGORIES) + "\n\n"
|
||||
"Rules:\n"
|
||||
"- Sentence order: extract from sentence 1 first, then sentence 2, etc.\n"
|
||||
"- Multiple triplets per sentence: one object per triplet, keep order\n"
|
||||
"- target: exact word/phrase from text, or NULL if implicit\n"
|
||||
"- Output ONLY the JSON array, no explanation\n\n"
|
||||
'Example:\nReview: "Food was great. Service was slow."\n'
|
||||
'Output: [{"target": "food", "aspect": "FOOD#QUALITY", "polarity": "positive"}, '
|
||||
'{"target": "NULL", "aspect": "SERVICE#GENERAL", "polarity": "negative"}]'
|
||||
)
|
||||
|
||||
def build_prompt(text: str) -> str:
|
||||
return (
|
||||
"<start_of_turn>user\n"
|
||||
f"{INSTRUCTION}\n\nReview: {text}"
|
||||
"<end_of_turn>\n"
|
||||
"<start_of_turn>model\n"
|
||||
)
|
||||
|
||||
def parse_output(raw: str) -> list[dict]:
|
||||
match = re.search(r"\[.*?\]", raw, re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
return json.loads(match.group())
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
try:
|
||||
return json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
|
||||
review = "This place serves fast, it's been over 30 minutes and the dish still hasn't come out."
|
||||
|
||||
inputs = tokenizer(build_prompt(review), return_tensors="pt").to("cuda")
|
||||
|
||||
with torch.no_grad():
|
||||
output = model.generate(
|
||||
**inputs,
|
||||
max_new_tokens = 1024,
|
||||
temperature = 0.8,
|
||||
do_sample = False,
|
||||
pad_token_id = tokenizer.eos_token_id,
|
||||
)
|
||||
|
||||
raw = tokenizer.decode(
|
||||
output[0][inputs["input_ids"].shape[1]:],
|
||||
skip_special_tokens=True,
|
||||
).strip()
|
||||
|
||||
triplets = parse_output(raw)
|
||||
|
||||
print(f"Review : {review}\n")
|
||||
print(f"Raw output : {raw}\n")
|
||||
print("Extracted triplets:")
|
||||
for i, t in enumerate(triplets, 1):
|
||||
print(f" {i}. target={t['target']!r:20s} aspect={t['aspect']:30s} polarity={t['polarity']}")
|
||||
|
||||
```
|
||||
Reference in New Issue
Block a user