Files
xc-llm-ascend/examples/offline_embed.py
SILONG ZENG 78d5ce3e01 [Lint]Style: Convert example to ruff format (#5863)
### What this PR does / why we need it?
This PR fixes linting issues in the `example/` to align with the
project's Ruff configuration.

- vLLM version: v0.13.0
- vLLM main:
bde38c11df

Signed-off-by: root <root@LAPTOP-VQKDDVMG.localdomain>
Co-authored-by: root <root@LAPTOP-VQKDDVMG.localdomain>
2026-01-13 20:46:50 +08:00

61 lines
2.2 KiB
Python

#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
# Copyright 2023 The vLLM team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
# Adapted from https://www.modelscope.cn/models/Qwen/Qwen3-Embedding-0.6B
#
import os
import torch
from vllm import LLM
os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
def get_detailed_instruct(task_description: str, query: str) -> str:
return f"Instruct: {task_description}\nQuery:{query}"
def main():
# Each query must come with a one-sentence instruction that describes the task
task = "Given a web search query, retrieve relevant passages that answer the query"
queries = [
get_detailed_instruct(task, "What is the capital of China?"),
get_detailed_instruct(task, "Explain gravity"),
]
# No need to add instruction for retrieval documents
documents = [
"The capital of China is Beijing.",
"Gravity is a force that attracts two bodies towards each other. "
"It gives weight to physical objects and is responsible for the movement of planets around the sun.",
]
input_texts = queries + documents
model = LLM(model="Qwen/Qwen3-Embedding-0.6B", runner="pooling")
outputs = model.embed(input_texts)
embeddings = torch.tensor([o.outputs.embedding for o in outputs])
# Calculate the similarity scores between the first two queries and the last two documents
scores = embeddings[:2] @ embeddings[2:].T
print(scores.tolist())
# [[0.7620252966880798, 0.14078938961029053], [0.1358368694782257, 0.6013815999031067]]
if __name__ == "__main__":
main()