v0.10.1rc1
This commit is contained in:
58
examples/offline_embed.py
Normal file
58
examples/offline_embed.py
Normal file
@@ -0,0 +1,58 @@
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
# Copyright 2023 The vLLM team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
# Adapted from https://www.modelscope.cn/models/Qwen/Qwen3-Embedding-0.6B
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
import torch
|
||||
from vllm import LLM
|
||||
|
||||
os.environ["VLLM_USE_MODELSCOPE"] = "True"
|
||||
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
|
||||
|
||||
def get_detailed_instruct(task_description: str, query: str) -> str:
|
||||
return f'Instruct: {task_description}\nQuery:{query}'
|
||||
|
||||
|
||||
def main():
|
||||
# Each query must come with a one-sentence instruction that describes the task
|
||||
task = 'Given a web search query, retrieve relevant passages that answer the query'
|
||||
|
||||
queries = [
|
||||
get_detailed_instruct(task, 'What is the capital of China?'),
|
||||
get_detailed_instruct(task, 'Explain gravity')
|
||||
]
|
||||
# No need to add instruction for retrieval documents
|
||||
documents = [
|
||||
"The capital of China is Beijing.",
|
||||
"Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun."
|
||||
]
|
||||
input_texts = queries + documents
|
||||
|
||||
model = LLM(model="Qwen/Qwen3-Embedding-0.6B", task="embed")
|
||||
|
||||
outputs = model.embed(input_texts)
|
||||
embeddings = torch.tensor([o.outputs.embedding for o in outputs])
|
||||
# Calculate the similarity scores between the first two queries and the last two documents
|
||||
scores = (embeddings[:2] @ embeddings[2:].T)
|
||||
print(scores.tolist())
|
||||
# [[0.7620252966880798, 0.14078938961029053], [0.1358368694782257, 0.6013815999031067]]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user