初始化项目,由ModelHub XC社区提供模型

Model: iic/ERank-4B
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-08 18:41:13 +08:00
commit 95c74472e0
20 changed files with 910104 additions and 0 deletions

View File

@@ -0,0 +1,146 @@
from torch.nn import functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer
from utils import prompt_template, truncate, hybrid_scores
class ERank_Transformer:
def __init__(self, model_name_or_path: str):
"""
Initializes the ERank_Transformer reranker.
Args:
model_name_or_path (str): The name or path of the model to be loaded.
This can be a Hugging Face model ID or a local path.
"""
self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
self.reranker = AutoModelForCausalLM.from_pretrained(model_name_or_path).eval()
self.reranker.to("cuda")
def rerank(self, query: str, docs: list, instruction: str, truncate_length: int=None) -> list:
"""
Reranks a list of documents based on a query and a specific instruction.
Args:
query (str): The search query provided by the user.
docs (list): A list of dictionaries, where each dictionary represents a document
and must contain a "content" key.
instruction (str): The instruction for the model, guiding it on how to evaluate the documents.
truncate_length (int, optional): The maximum length to truncate the query and document content to. Defaults to None.
Returns:
list: A new list of document dictionaries, sorted by their "rank_score" in descending order.
"""
# prepare messages
messages = [
[{
"role": "user",
"content": prompt_template.format(
query=truncate(self.tokenizer, query, length=truncate_length) if truncate_length else query,
doc=truncate(self.tokenizer, doc["content"], length=truncate_length) if truncate_length else doc["content"],
instruction=instruction
)
}] for doc in docs
]
# encode tokens
texts = [
self.tokenizer.apply_chat_template(
each,
tokenize=False,
add_generation_prompt=True,
) for each in messages
]
inputs = self.tokenizer(texts, padding=True, return_tensors="pt").to(self.reranker.device)
# LLM completion
outputs = self.reranker.generate(
**inputs,
max_new_tokens=8192,
output_scores=True,
return_dict_in_generate=True
)
# extract and organize results
results = []
scores = outputs.scores
generated_ids = outputs.sequences
answer_token_ids = self.tokenizer.encode("<answer>", add_special_tokens=False)
for idx in range(len(texts)):
# find <answer> in the generated sequence
output_ids = generated_ids[idx].tolist()
start_index = -1
for i in range(len(output_ids)-len(answer_token_ids)-1, -1, -1):
if output_ids[i:i + len(answer_token_ids)] == answer_token_ids:
start_index = i + len(answer_token_ids)
break
# start from the index after <answer>
answer = ""
prob = 1.0
if start_index != -1:
for t in range(start_index - inputs.input_ids.size(1), len(scores)):
generated_token_id = generated_ids[idx][inputs.input_ids.size(1) + t]
token = self.tokenizer.decode(generated_token_id)
if token.isdigit():
logits = scores[t][idx]
probs = F.softmax(logits, dim=-1)
prob *= probs[generated_token_id].item()
answer += token
else:
break
# in case the answer is not a digit or exceeds 10
try:
answer = int(answer)
assert answer <= 10
except:
answer = -1
# append to the final results
results.append({
**docs[idx],
"rank_score": answer * prob
})
# sort the reranking results for the query
results.sort(key=lambda x:x["rank_score"], reverse=True)
return results
if __name__ == "__main__":
# select a model
model_name_or_path = "Ucreate/ERank-4B"
# model_name_or_path = "Ucreate/ERank-14B"
# model_name_or_path = "Ucreate/ERank-32B"
reranker = ERank_Transformer(model_name_or_path)
# input data
instruction = "Retrieve relevant documents for the query."
query = "I am happy"
docs = [
{"content": "excited", "first_stage_score": 46.7},
{"content": "sad", "first_stage_score": 1.5},
{"content": "peaceful", "first_stage_score": 2.3},
]
# rerank
results = reranker.rerank(query, docs, instruction, truncate_length=2048)
print(results)
# [
# {'content': 'excited', 'first_stage_score': 46.7, 'rank_score': 4.84},
# {'content': 'peaceful', 'first_stage_score': 2.3, 'rank_score': 2.98}
# {'content': 'sad', 'first_stage_score': 1.5, 'rank_score': 0.0},
# ]
# Optional: hybrid with first-stage scores
alpha = 0.2
hybrid_results = hybrid_scores(results, alpha)
print(hybrid_results)
# [
# {'content': 'excited', 'first_stage_score': 46.7, 'rank_score': 4.84, 'hybrid_score': 1.18},
# {'content': 'peaceful', 'first_stage_score': 2.3, 'rank_score': 2.98, 'hybrid_score':0.01},
# {'content': 'sad', 'first_stage_score': 1.5, 'rank_score': 0.0, 'hybrid_score': -1.19}
# ]

97
examples/ERank_vLLM.py Normal file
View File

@@ -0,0 +1,97 @@
import torch
import math
from vllm import LLM, SamplingParams
from utils import prompt_template, truncate
class ERank_vLLM:
def __init__(self, model_name_or_path: str):
"""
Initializes the ERank_vLLM reranker.
Args:
model_name_or_path (str): The name or path of the model to be loaded.
This can be a Hugging Face model ID or a local path.
"""
num_gpu = torch.cuda.device_count()
self.ranker = LLM(
model=model_name_or_path,
tensor_parallel_size=num_gpu,
gpu_memory_utilization=0.95,
enable_prefix_caching=True
)
self.tokenizer = self.ranker.get_tokenizer()
self.sampling_params = SamplingParams(
temperature=0,
max_tokens=4096,
logprobs=20
)
def rerank(self, query: str, docs: list, instruction: str, truncate_length: int=None) -> list:
"""
Reranks a list of documents based on a query and a specific instruction.
Args:
query (str): The search query provided by the user.
docs (list): A list of dictionaries, where each dictionary represents a document
and must contain a "content" key.
instruction (str): The instruction for the model, guiding it on how to evaluate the documents.
truncate_length (int, optional): The maximum length to truncate the query and document content to. Defaults to None.
Returns:
list: A new list of document dictionaries, sorted by their "rank_score" in descending order.
"""
# prepare messages
messages = [
[{
"role": "user",
"content": prompt_template.format(
query=truncate(self.tokenizer, query, length=truncate_length) if truncate_length else query,
doc=truncate(self.tokenizer, doc["content"], length=truncate_length) if truncate_length else doc["content"],
instruction=instruction
)
}] for doc in docs
]
# LLM generate
outputs = self.ranker.chat(messages, self.sampling_params)
# extract and organize results
results = []
for doc, output in zip(docs, outputs):
# extract the answer and its probability
cur = ""
answer = ""
is_ans = False
prob = 1.0
for each in output.outputs[0].logprobs[-10:]:
_, detail = next(iter(each.items()))
token = detail.decoded_token
logprob = detail.logprob
if is_ans and token.isdigit():
answer += token
prob *= math.exp(logprob)
else:
cur += token
if cur.endswith("<answer>"):
is_ans = True
# in case the answer is not a digit or exceeds 10
try:
answer = int(answer)
assert answer <= 10
except:
answer = -1
# append to the final results
results.append({
**doc,
"rank_score": answer * prob
})
# sort the reranking results for the query
results.sort(key=lambda x:x["rank_score"], reverse=True)
return results

View File

@@ -0,0 +1,10 @@
{
"BRIGHT (AoPS)": "We want to find different but similar math problems to the query. A document is relevant if it uses the same class of functions and shares any overlapping techniques.",
"BRIGHT (LeetCode)": "I am looking to find different problems that share similar data structures (of any kind) or algorithms (e.g. DFS, DP, sorting, traversals, etc.). I am looking for problems that share one or both of these similarities to the query. Does the passage below share any similarities? e.g. if there was a textbook on leetcode problems, this would be in the same book even though it could be in a different chapter.",
"BRIGHT (Pony)": "I will use the programming language pony. But to solve the problem above, I need to know things about pony. A passage is relevant if it contains docs that match any part (even basic parts) of the code I will have to write for the above program.",
"BRIGHT (TheoremQA-Q)": "We want to find a document which uses the same mathematical process as the query. A document is relevant if it uses the same mathematical process as the query.",
"BRIGHT (TheoremQA-T)": "We want to find a document which uses the same mathematical process as the query. A document is relevant if it uses the same mathematical process as the query.",
"BRIGHT (others)": "A document is relevant if it contains information that helps answer or address the query. A document is not relevant if it doesn't contain information that helps answer the query, even if it mentions similar topics.",
"BEIR / TREC DL": "Given a query, retrieval relevant passage.",
"FollowIR": "Retrieval the relevant passage for the given query. Be careful about the extra requirements about relevance in the query."
}

44
examples/utils.py Normal file
View File

@@ -0,0 +1,44 @@
import numpy as np
prompt_template = """Given a query and a document, please give a relevance score of 0~10.
The goal or relevance definition is: {instruction}
Here is the query:
{query}
Here is the document:
{doc}
After thinking, directly choose a relevance score from [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10].
- 0 represents completely not related
- 10 means perfectly related.
Desired output format:
<think>put your thinking here</think><answer>Only allows an integer here</answer>
Your output:"""
def truncate(tokenizer, text, length):
if length == None or text == None:
return text
return tokenizer.convert_tokens_to_string(tokenizer.tokenize(text)[:length])
def hybrid_scores(results, alpha):
first_stage_scores = [each["first_stage_score"] for each in results]
rank_scores = [each["rank_score"] for each in results]
first_stage_mean, first_stage_std = np.mean(first_stage_scores), np.std(first_stage_scores)
rank_mean, rank_std = np.mean(rank_scores), np.std(rank_scores)
hybrid_results = []
for result in results:
normalized_first_stage_score = (result["first_stage_score"] - first_stage_mean) / first_stage_std
normalized_rank_score = (result["rank_score"] - rank_mean) / rank_std
hybrid_results.append({
**result,
"hybrid_score": float(alpha * normalized_first_stage_score + (1-alpha) * normalized_rank_score)
})
hybrid_results.sort(key=lambda x:x['hybrid_score'], reverse=True)
return hybrid_results