[Model] Support pooling models (#3122)
### What this PR does / why we need it? Support pooling models (like `bge-reranker-v2-m3`) in vllm-ascend, this pr covered the three model types of embed (cls_token, mean_token, lasttoken). After this [commit](17373dcd93), vllm has provided support for adapting pooling models on the v1 engine. This PR includes corresponding adaptations on the vllm-ascend side. Fixes #1960 - vLLM version: v0.12.0 - vLLM main:ad32e3e19c--------- Signed-off-by: lianyibo <lianyibo1@kunlunit.com> Signed-off-by: MengqingCao <cmq0113@163.com> Co-authored-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
@@ -793,17 +793,12 @@ class InputBatch:
|
||||
logitsprocs=self.logitsprocs,
|
||||
)
|
||||
|
||||
@property
|
||||
def pooling_metadata(self) -> PoolingMetadata:
|
||||
if len(self.pooling_params) == 0:
|
||||
pooling_params = []
|
||||
else:
|
||||
# Note, for now this assumes that all request in the batch
|
||||
# are either sampling or pooling requests
|
||||
assert len(self.req_ids) == len(self.pooling_params)
|
||||
pooling_params = [
|
||||
self.pooling_params[req_id] for req_id in self.req_ids
|
||||
]
|
||||
def get_pooling_params(self) -> list[PoolingParams]:
|
||||
assert len(self.req_ids) == len(self.pooling_params)
|
||||
return [self.pooling_params[req_id] for req_id in self.req_ids]
|
||||
|
||||
def get_pooling_metadata(self) -> PoolingMetadata:
|
||||
pooling_params = self.get_pooling_params()
|
||||
|
||||
return PoolingMetadata(
|
||||
prompt_lens=torch.from_numpy(
|
||||
|
||||
Reference in New Issue
Block a user