### What this PR does / why we need it?
The `os.environ["VLLM_USE_MODELSCOPE"] = "True"` should be placed before
module imports
if not
```
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/xleoken/projects/vllm-ascend/examples/offline_embed.py", line 48, in <module>
model = LLM(model="Qwen/Qwen3-Embedding-0.6B", task="embed")
File "/usr/local/python3.10.17/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 243, in __init__
self.llm_engine = LLMEngine.from_engine_args(
File "/usr/local/python3.10.17/lib/python3.10/site-packages/vllm/engine/llm_engine.py", line 494, in from_engine_args
vllm_config = engine_args.create_engine_config(usage_context)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/vllm/engine/arg_utils.py", line 1018, in create_engine_config
model_config = self.create_model_config()
File "/usr/local/python3.10.17/lib/python3.10/site-packages/vllm/engine/arg_utils.py", line 910, in create_model_config
return ModelConfig(
File "/usr/local/python3.10.17/lib/python3.10/site-packages/pydantic/_internal/_dataclasses.py", line 120, in __init__
s.__pydantic_validator__.validate_python(ArgsKwargs(args, kwargs), self_instance=s)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/vllm/config.py", line 528, in __post_init__
hf_config = get_config(self.hf_config_path or self.model,
File "/usr/local/python3.10.17/lib/python3.10/site-packages/vllm/transformers_utils/config.py", line 321, in get_config
config_dict, _ = PretrainedConfig.get_config_dict(
File "/usr/local/python3.10.17/lib/python3.10/site-packages/transformers/configuration_utils.py", line 590, in get_config_dict
config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/transformers/configuration_utils.py", line 649, in _get_config_dict
resolved_config_file = cached_file(
File "/usr/local/python3.10.17/lib/python3.10/site-packages/transformers/utils/hub.py", line 266, in cached_file
file = cached_files(path_or_repo_id=path_or_repo_id, filenames=[filename], **kwargs)
File "/usr/local/python3.10.17/lib/python3.10/site-packages/transformers/utils/hub.py", line 491, in cached_files
raise OSError(
OSError: We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.
Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.
[ERROR] 2025-07-03-15:27:10 (PID:333665, Device:-1, RankID:-1) ERR99999 UNKNOWN applicaiton exception
```
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Local.
Signed-off-by: xleoken <xleoken@163.com>
52 lines
2.0 KiB
Python
52 lines
2.0 KiB
Python
#
|
|
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
|
# Copyright 2023 The vLLM team.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# This file is a part of the vllm-ascend project.
|
|
# Adapted from https://www.modelscope.cn/models/Qwen/Qwen3-Embedding-0.6B
|
|
#
|
|
|
|
import os
|
|
os.environ["VLLM_USE_MODELSCOPE"] = "True"
|
|
|
|
import torch
|
|
from vllm import LLM
|
|
|
|
def get_detailed_instruct(task_description: str, query: str) -> str:
|
|
return f'Instruct: {task_description}\nQuery:{query}'
|
|
|
|
|
|
# Each query must come with a one-sentence instruction that describes the task
|
|
task = 'Given a web search query, retrieve relevant passages that answer the query'
|
|
|
|
queries = [
|
|
get_detailed_instruct(task, 'What is the capital of China?'),
|
|
get_detailed_instruct(task, 'Explain gravity')
|
|
]
|
|
# No need to add instruction for retrieval documents
|
|
documents = [
|
|
"The capital of China is Beijing.",
|
|
"Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun."
|
|
]
|
|
input_texts = queries + documents
|
|
|
|
model = LLM(model="Qwen/Qwen3-Embedding-0.6B", task="embed")
|
|
|
|
outputs = model.embed(input_texts)
|
|
embeddings = torch.tensor([o.outputs.embedding for o in outputs])
|
|
# Calculate the similarity scores between the first two queries and the last two documents
|
|
scores = (embeddings[:2] @ embeddings[2:].T)
|
|
print(scores.tolist())
|
|
# [[0.7620252966880798, 0.14078938961029053], [0.1358368694782257, 0.6013815999031067]]
|