From a66ef39bb6824f6406e3296c06d35c17ec173109 Mon Sep 17 00:00:00 2001 From: Shanshan Shen <467638484@qq.com> Date: Mon, 21 Jul 2025 12:01:45 +0800 Subject: [PATCH] [Misc][V0 Deprecation] Remove Redundant Offline Distributed Inference Example (#1899) ### What this PR does / why we need it? The file `offline_distributed_inference_npu.py` is the same as `offline_inference_npu_tp2.py`, thus we delete one of them. This PR is a part of https://github.com/vllm-project/vllm-ascend/issues/1620. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.9.2 - vLLM main: https://github.com/vllm-project/vllm/commit/8188196a1c8af26134d8e366ebe564c18fb95379 Signed-off-by: shen-shanshan <467638484@qq.com> --- examples/offline_distributed_inference_npu.py | 53 ------------------- 1 file changed, 53 deletions(-) delete mode 100644 examples/offline_distributed_inference_npu.py diff --git a/examples/offline_distributed_inference_npu.py b/examples/offline_distributed_inference_npu.py deleted file mode 100644 index 4e2e7ed..0000000 --- a/examples/offline_distributed_inference_npu.py +++ /dev/null @@ -1,53 +0,0 @@ -# -# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. -# Copyright 2023 The vLLM team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# This file is a part of the vllm-ascend project. -# Adapted from vllm-project/vllm/examples/offline_inference/basic.py -# - -import os -from vllm import LLM, SamplingParams - -os.environ["VLLM_USE_MODELSCOPE"] = "True" -os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" - -def main(): - prompts = [ - "Hello, my name is", - "The president of the United States is", - "The capital of France is", - "The future of AI is", - ] - - # Create a sampling params object. - sampling_params = SamplingParams(max_tokens=100, temperature=0.0) - # Create an LLM. - llm = LLM( - model="Qwen/Qwen2.5-0.5B-Instruct", - tensor_parallel_size=2, - distributed_executor_backend="mp", - trust_remote_code=True, - ) - - # Generate texts from the prompts. - outputs = llm.generate(prompts, sampling_params) - for output in outputs: - prompt = output.prompt - generated_text = output.outputs[0].text - print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") - - -if __name__ == "__main__": - main()