diff --git a/examples/offline_distributed_inference_npu.py b/examples/offline_distributed_inference_npu.py deleted file mode 100644 index 4e2e7ed..0000000 --- a/examples/offline_distributed_inference_npu.py +++ /dev/null @@ -1,53 +0,0 @@ -# -# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. -# Copyright 2023 The vLLM team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# This file is a part of the vllm-ascend project. -# Adapted from vllm-project/vllm/examples/offline_inference/basic.py -# - -import os -from vllm import LLM, SamplingParams - -os.environ["VLLM_USE_MODELSCOPE"] = "True" -os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" - -def main(): - prompts = [ - "Hello, my name is", - "The president of the United States is", - "The capital of France is", - "The future of AI is", - ] - - # Create a sampling params object. - sampling_params = SamplingParams(max_tokens=100, temperature=0.0) - # Create an LLM. - llm = LLM( - model="Qwen/Qwen2.5-0.5B-Instruct", - tensor_parallel_size=2, - distributed_executor_backend="mp", - trust_remote_code=True, - ) - - # Generate texts from the prompts. - outputs = llm.generate(prompts, sampling_params) - for output in outputs: - prompt = output.prompt - generated_text = output.outputs[0].text - print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") - - -if __name__ == "__main__": - main()