274 lines
46 KiB
Markdown
274 lines
46 KiB
Markdown
|
|
---
|
||
|
|
library_name: transformers
|
||
|
|
license: apache-2.0
|
||
|
|
language:
|
||
|
|
- af
|
||
|
|
- am
|
||
|
|
- ar
|
||
|
|
- as
|
||
|
|
- az
|
||
|
|
- be
|
||
|
|
- bg
|
||
|
|
- bn
|
||
|
|
- br
|
||
|
|
- bs
|
||
|
|
- ca
|
||
|
|
- cs
|
||
|
|
- cy
|
||
|
|
- da
|
||
|
|
- de
|
||
|
|
- el
|
||
|
|
- en
|
||
|
|
- eo
|
||
|
|
- es
|
||
|
|
- et
|
||
|
|
- eu
|
||
|
|
- fa
|
||
|
|
- ff
|
||
|
|
- fi
|
||
|
|
- fr
|
||
|
|
- fy
|
||
|
|
- ga
|
||
|
|
- gd
|
||
|
|
- gl
|
||
|
|
- gn
|
||
|
|
- gu
|
||
|
|
- ha
|
||
|
|
- he
|
||
|
|
- hi
|
||
|
|
- hr
|
||
|
|
- ht
|
||
|
|
- hu
|
||
|
|
- hy
|
||
|
|
- id
|
||
|
|
- ig
|
||
|
|
- is
|
||
|
|
- it
|
||
|
|
- ja
|
||
|
|
- jv
|
||
|
|
- ka
|
||
|
|
- kk
|
||
|
|
- km
|
||
|
|
- kn
|
||
|
|
- ko
|
||
|
|
- ku
|
||
|
|
- ky
|
||
|
|
- la
|
||
|
|
- lg
|
||
|
|
- li
|
||
|
|
- ln
|
||
|
|
- lo
|
||
|
|
- lt
|
||
|
|
- lv
|
||
|
|
- mg
|
||
|
|
- mk
|
||
|
|
- ml
|
||
|
|
- mn
|
||
|
|
- mr
|
||
|
|
- ms
|
||
|
|
- my
|
||
|
|
- ne
|
||
|
|
- nl
|
||
|
|
- 'no'
|
||
|
|
- ns
|
||
|
|
- om
|
||
|
|
- or
|
||
|
|
- pa
|
||
|
|
- pl
|
||
|
|
- ps
|
||
|
|
- pt
|
||
|
|
- qu
|
||
|
|
- rm
|
||
|
|
- ro
|
||
|
|
- ru
|
||
|
|
- sa
|
||
|
|
- sc
|
||
|
|
- sd
|
||
|
|
- si
|
||
|
|
- sk
|
||
|
|
- sl
|
||
|
|
- so
|
||
|
|
- sq
|
||
|
|
- sr
|
||
|
|
- ss
|
||
|
|
- su
|
||
|
|
- sv
|
||
|
|
- sw
|
||
|
|
- ta
|
||
|
|
- te
|
||
|
|
- th
|
||
|
|
- tl
|
||
|
|
- tn
|
||
|
|
- tr
|
||
|
|
- ug
|
||
|
|
- uk
|
||
|
|
- ur
|
||
|
|
- uz
|
||
|
|
- vi
|
||
|
|
- wo
|
||
|
|
- xh
|
||
|
|
- yi
|
||
|
|
- yo
|
||
|
|
- zh
|
||
|
|
- zu
|
||
|
|
language_bcp47:
|
||
|
|
- bn-Latn
|
||
|
|
- hi-Latn
|
||
|
|
- my-x-zawgyi
|
||
|
|
- ta-Latn
|
||
|
|
- te-Latn
|
||
|
|
- ur-Latn
|
||
|
|
- zh-Hans
|
||
|
|
- zh-Hant
|
||
|
|
pipeline_tag: text-ranking
|
||
|
|
tags:
|
||
|
|
- sentence-transformers
|
||
|
|
---
|
||
|
|
|
||
|
|
<br><br>
|
||
|
|
|
||
|
|
<p align="center">
|
||
|
|
<svg xmlns="http://www.w3.org/2000/svg" xml:space="preserve" viewBox="0 0 2020 1130" width="150" height="150" aria-hidden="true"><path fill="#e95a0f" d="M398.167 621.992c-1.387-20.362-4.092-40.739-3.851-61.081.355-30.085 6.873-59.139 21.253-85.976 10.487-19.573 24.09-36.822 40.662-51.515 16.394-14.535 34.338-27.046 54.336-36.182 15.224-6.955 31.006-12.609 47.829-14.168 11.809-1.094 23.753-2.514 35.524-1.836 23.033 1.327 45.131 7.255 66.255 16.75 16.24 7.3 31.497 16.165 45.651 26.969 12.997 9.921 24.412 21.37 34.158 34.509 11.733 15.817 20.849 33.037 25.987 52.018 3.468 12.81 6.438 25.928 7.779 39.097 1.722 16.908 1.642 34.003 2.235 51.021.427 12.253.224 24.547 1.117 36.762 1.677 22.93 4.062 45.764 11.8 67.7 5.376 15.239 12.499 29.55 20.846 43.681l-18.282 20.328c-1.536 1.71-2.795 3.665-4.254 5.448l-19.323 23.533c-13.859-5.449-27.446-11.803-41.657-16.086-13.622-4.106-27.793-6.765-41.905-8.775-15.256-2.173-30.701-3.475-46.105-4.049-23.571-.879-47.178-1.056-70.769-1.029-10.858.013-21.723 1.116-32.57 1.926-5.362.4-10.69 1.255-16.464 1.477-2.758-7.675-5.284-14.865-7.367-22.181-3.108-10.92-4.325-22.554-13.16-31.095-2.598-2.512-5.069-5.341-6.883-8.443-6.366-10.884-12.48-21.917-18.571-32.959-4.178-7.573-8.411-14.375-17.016-18.559-10.34-5.028-19.538-12.387-29.311-18.611-3.173-2.021-6.414-4.312-9.952-5.297-5.857-1.63-11.98-2.301-17.991-3.376z"></path><path fill="#ed6d7b" d="M1478.998 758.842c-12.025.042-24.05.085-36.537-.373-.14-8.536.231-16.569.453-24.607.033-1.179-.315-2.986-1.081-3.4-.805-.434-2.376.338-3.518.81-.856.354-1.562 1.069-3.589 2.521-.239-3.308-.664-5.586-.519-7.827.488-7.544 2.212-15.166 1.554-22.589-1.016-11.451 1.397-14.592-12.332-14.419-3.793.048-3.617-2.803-3.332-5.331.499-4.422 1.45-8.803 1.77-13.233.311-4.316.068-8.672.068-12.861-2.554-.464-4.326-.86-6.12-1.098-4.415-.586-6.051-2.251-5.065-7.31 1.224-6.279.848-12.862 1.276-19.306.19-2.86-.971-4.473-3.794-4.753-4.113-.407-8.242-1.057-12.352-.975-4.663.093-5.192-2.272-4.751-6.012.733-6.229 1.252-12.483 1.875-18.726l1.102-10.495c-5.905-.309-11.146-.805-16.385-.778-3.32.017-5.174-1.4-5.566-4.4-1.172-8.968-2.479-17.944-3.001-26.96-.26-4.484-1.936-5.705-6.005-5.774-9.284-.158-18.563-.594-27.843-.953-7.241-.28-10.137-2.764-11.3-9.899-.746-4.576-2.715-7.801-7.777-8.207-7.739-.621-15.511-.992-23.207-1.961-7.327-.923-14.587-2.415-21.853-3.777-5.021-.941-10.003-2.086-15.003-3.14 4.515-22.952 13.122-44.382 26.284-63.587 18.054-26.344 41.439-47.239 69.102-63.294 15.847-9.197 32.541-16.277 50.376-20.599 16.655-4.036 33.617-5.715 50.622-4.385 33.334 2.606 63.836 13.955 92.415 31.15 15.864 9.545 30.241 20.86 42.269 34.758 8.113 9.374 15.201 19.78 21.718 30.359 10.772 17.484 16.846 36.922 20.611 56.991 1.783 9.503 2.815 19.214 3.318 28.876.758 14.578.755 29.196.65 44.311l-51.545 20.013c-7.779 3.059-15.847 5.376-21.753 12.365-4.73 5.598-10.658 10.316-16.547 14.774-9.9 7.496-18.437 15.988-25.083 26.631-3.333 5.337-7.901 10.381-12.999 14.038-11.355 8.144-17.397 18.973-19.615 32.423l-6.988 41.011z"></path><path fill="#ec663e" d="M318.11 923.047c-.702 17.693-.832 35.433-2.255 53.068-1.699 21.052-6.293 41.512-14.793 61.072-9.001 20.711-21.692 38.693-38.496 53.583-16.077 14.245-34.602 24.163-55.333 30.438-21.691 6.565-43.814 8.127-66.013 6.532-22.771-1.636-43.88-9.318-62.74-22.705-20.223-14.355-35.542-32.917-48.075-54.096-9.588-16.203-16.104-33.55-19.201-52.015-2.339-13.944-2.307-28.011-.403-42.182 2.627-19.545 9.021-37.699 17.963-55.067 11.617-22.564 27.317-41.817 48.382-56.118 15.819-10.74 33.452-17.679 52.444-20.455 8.77-1.282 17.696-1.646 26.568-2.055 11.755-.542 23.534-.562 35.289-1.11 8.545-.399 17.067-1.291 26.193-1.675 1.349 1.77 2.24 3.199 2.835 4.742 4.727 12.261 10.575 23.865 18.636 34.358 7.747 10.084 14.83 20.684 22.699 30.666 3.919 4.972 8.37 9.96 13.609 13.352 7.711 4.994 16.238 8.792 24.617 12.668 5.852 2.707 12.037 4.691 18.074 6.998z"></path><path fill="#ea580e" d="M1285.167 162.995c3.796-29.75 13.825-56.841 32.74-80.577 16.339-20.505 36.013-36.502 59.696-47.614 14.666-6.881 29.971-11.669 46.208-12.749 10.068-.669 20.239-1.582 30.255-.863 16.6 1.191 32.646 5.412 47.
|
||
|
|
</p>
|
||
|
|
|
||
|
|
<p align="center">
|
||
|
|
<b>The crispy rerank family from <a href="https://mixedbread.com"><b>Mixedbread</b></a>.</b>
|
||
|
|
</p>
|
||
|
|
|
||
|
|
<p align="center">
|
||
|
|
<sup> 🍞 Looking for a simple end-to-end retrieval solution? Meet Omni, our multimodal and multilingual model. <a href="https://mixedbread.com"><b>Get in touch for access.</b></a> </sup>
|
||
|
|
</p>
|
||
|
|
|
||
|
|
# 🍞 mxbai-rerank-large-v2 (a.k.a ProRank-1.5B)
|
||
|
|
|
||
|
|
This is the large model in our family of powerful reranker models. You can learn more about the models in our [blog post](https://www.mixedbread.ai/blog/mxbai-rerank-v2).
|
||
|
|
|
||
|
|
We have two models:
|
||
|
|
|
||
|
|
- [mxbai-rerank-base-v2](https://huggingface.co/mixedbread-ai/mxbai-rerank-base-v2)
|
||
|
|
- [mxbai-rerank-large-v2](https://huggingface.co/mixedbread-ai/mxbai-rerank-large-v2) (🍞)
|
||
|
|
|
||
|
|
|
||
|
|
**The technical report is coming soon!**
|
||
|
|
|
||
|
|
|
||
|
|
## 🌟 Features
|
||
|
|
|
||
|
|
- state-of-the-art performance and strong efficiency
|
||
|
|
- multilingual support (100+ languages, outstanding English and Chinese performance)
|
||
|
|
- code support
|
||
|
|
- long-context support
|
||
|
|
|
||
|
|
|
||
|
|
## ⚙️ Usage
|
||
|
|
|
||
|
|
### Using Sentence Transformers
|
||
|
|
|
||
|
|
Install Sentence Transformers:
|
||
|
|
```bash
|
||
|
|
pip install sentence_transformers
|
||
|
|
```
|
||
|
|
|
||
|
|
```python
|
||
|
|
from sentence_transformers import CrossEncoder
|
||
|
|
|
||
|
|
model = CrossEncoder("mixedbread-ai/mxbai-rerank-large-v2")
|
||
|
|
|
||
|
|
query = "Who wrote 'To Kill a Mockingbird'?"
|
||
|
|
documents = [
|
||
|
|
"'To Kill a Mockingbird' is a novel by Harper Lee published in 1960. It was immediately successful, winning the Pulitzer Prize, and has become a classic of modern American literature.",
|
||
|
|
"The novel 'Moby-Dick' was written by Herman Melville and first published in 1851. It is considered a masterpiece of American literature and deals with complex themes of obsession, revenge, and the conflict between good and evil.",
|
||
|
|
"Harper Lee, an American novelist widely known for her novel 'To Kill a Mockingbird', was born in 1926 in Monroeville, Alabama. She received the Pulitzer Prize for Fiction in 1961.",
|
||
|
|
"Jane Austen was an English novelist known primarily for her six major novels, which interpret, critique and comment upon the British landed gentry at the end of the 18th century.",
|
||
|
|
"The 'Harry Potter' series, which consists of seven fantasy novels written by British author J.K. Rowling, is among the most popular and critically acclaimed books of the modern era.",
|
||
|
|
"'The Great Gatsby', a novel written by American author F. Scott Fitzgerald, was published in 1925. The story is set in the Jazz Age and follows the life of millionaire Jay Gatsby and his pursuit of Daisy Buchanan.",
|
||
|
|
]
|
||
|
|
|
||
|
|
pairs = [(query, doc) for doc in documents]
|
||
|
|
scores = model.predict(pairs)
|
||
|
|
print(scores)
|
||
|
|
# [12. 0.5 11.125 0.8125 2.8125 1.5 ]
|
||
|
|
|
||
|
|
rankings = model.rank(query, documents)
|
||
|
|
print(rankings)
|
||
|
|
# [{'corpus_id': 0, 'score': 12.0}, {'corpus_id': 2, 'score': 11.125}, {'corpus_id': 4, 'score': 2.8125}, {'corpus_id': 5, 'score': 1.5}, {'corpus_id': 3, 'score': 0.8125}, {'corpus_id': 1, 'score': 0.5}]
|
||
|
|
```
|
||
|
|
|
||
|
|
### Using mxbai-rerank
|
||
|
|
|
||
|
|
1. Install mxbai-rerank
|
||
|
|
|
||
|
|
```bash
|
||
|
|
pip install mxbai-rerank
|
||
|
|
```
|
||
|
|
|
||
|
|
|
||
|
|
2. Inference
|
||
|
|
|
||
|
|
```python
|
||
|
|
from mxbai_rerank import MxbaiRerankV2
|
||
|
|
|
||
|
|
model = MxbaiRerankV2("mixedbread-ai/mxbai-rerank-large-v2")
|
||
|
|
|
||
|
|
query = "Who wrote 'To Kill a Mockingbird'?"
|
||
|
|
documents = [
|
||
|
|
"'To Kill a Mockingbird' is a novel by Harper Lee published in 1960. It was immediately successful, winning the Pulitzer Prize, and has become a classic of modern American literature.",
|
||
|
|
"The novel 'Moby-Dick' was written by Herman Melville and first published in 1851. It is considered a masterpiece of American literature and deals with complex themes of obsession, revenge, and the conflict between good and evil.",
|
||
|
|
"Harper Lee, an American novelist widely known for her novel 'To Kill a Mockingbird', was born in 1926 in Monroeville, Alabama. She received the Pulitzer Prize for Fiction in 1961.",
|
||
|
|
"Jane Austen was an English novelist known primarily for her six major novels, which interpret, critique and comment upon the British landed gentry at the end of the 18th century.",
|
||
|
|
"The 'Harry Potter' series, which consists of seven fantasy novels written by British author J.K. Rowling, is among the most popular and critically acclaimed books of the modern era.",
|
||
|
|
"'The Great Gatsby', a novel written by American author F. Scott Fitzgerald, was published in 1925. The story is set in the Jazz Age and follows the life of millionaire Jay Gatsby and his pursuit of Daisy Buchanan."
|
||
|
|
]
|
||
|
|
|
||
|
|
# Lets get the scores
|
||
|
|
results = model.rank(query, documents, return_documents=True, top_k=3)
|
||
|
|
|
||
|
|
print(results)
|
||
|
|
```
|
||
|
|
|
||
|
|
## Performance
|
||
|
|
|
||
|
|
### Benchmark Results
|
||
|
|
|
||
|
|
| Model | BEIR Avg | Multilingual | Chinese | Code Search | Latency (s) |
|
||
|
|
|-------|----------|----------|----------|--------------|-------------|
|
||
|
|
| mxbai-rerank-large-v2 | 57.49 | 29.79 | 84.16 | 32.05 | 0.89 |
|
||
|
|
| mxbai-rerank-base-v2 | 55.57 | 28.56 | 83.70 | 31.73 | 0.67 |
|
||
|
|
| mxbai-rerank-large-v1 | 49.32 | 21.88 | 72.53 | 30.72 | 2.24 |
|
||
|
|
|
||
|
|
*Latency measured on A100 GPU
|
||
|
|
|
||
|
|
## Training Details
|
||
|
|
|
||
|
|
The models were trained using a three-step process:
|
||
|
|
|
||
|
|
1. **GRPO (Guided Reinforcement Prompt Optimization)**
|
||
|
|
2. **Contrastive Learning**
|
||
|
|
3. **Preference Learning**
|
||
|
|
|
||
|
|
For more details, check our [technical report](https://www.arxiv.org/abs/2506.03487) and [technical blog post](https://mixedbread.com/blog/mxbai-rerank-v2).
|
||
|
|
|
||
|
|
## 🎓 Citation
|
||
|
|
|
||
|
|
If you find our models useful, please consider giving a star and citation
|
||
|
|
|
||
|
|
arXiv:
|
||
|
|
|
||
|
|
```bibtex
|
||
|
|
@article{li2025prorank,
|
||
|
|
title={ProRank: Prompt Warmup via Reinforcement Learning for Small Language Models Reranking},
|
||
|
|
author={Xianming Li and Aamir Shakir and Rui Huang and Julius Lipp and Benjamin Clavié and Jing Li},
|
||
|
|
journal={arXiv preprint arXiv:2506.03487},
|
||
|
|
year={2025}
|
||
|
|
}
|
||
|
|
```
|
||
|
|
|
||
|
|
blog post:
|
||
|
|
```bibtex
|
||
|
|
@online{v2rerank2025mxbai,
|
||
|
|
title={Baked-in Brilliance: Reranking Meets RL with mxbai-rerank-v2},
|
||
|
|
author={Sean Lee and Rui Huang and Aamir Shakir and Julius Lipp},
|
||
|
|
year={2025},
|
||
|
|
url={https://www.mixedbread.com/blog/mxbai-rerank-v2},
|
||
|
|
}
|
||
|
|
```
|