From 3ef45d0cc2ccf722c60b81dadf10d55913f0a215 Mon Sep 17 00:00:00 2001 From: xleoken Date: Wed, 9 Jul 2025 17:03:53 +0800 Subject: [PATCH] feat: Improve the offline_inference npu v0/v1 scripts (#1669) ### What this PR does / why we need it? Improve - Keep the same file name format as v1, `offline_inference_npu_v0.py`, `offline_inference_npu_v1.py` - Use `VLLM_USE_V1` = 0/1 clearly in py scripts - Fix some run errors in `offline_inference_npu_v1.py`, e.g. `deepseekv3-lite-base-latest` not exists in modescope or hf. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? - vLLM version: v0.9.2 - vLLM main: https://github.com/vllm-project/vllm/commit/baed180aa00314897b37b4b0af65adeba06f3d77 Signed-off-by: xleoken --- ...ffline_inference_npu.py => offline_inference_npu_v0.py} | 5 +++++ examples/offline_inference_npu_v1.py | 7 ++++--- 2 files changed, 9 insertions(+), 3 deletions(-) rename examples/{offline_inference_npu.py => offline_inference_npu_v0.py} (94%) diff --git a/examples/offline_inference_npu.py b/examples/offline_inference_npu_v0.py similarity index 94% rename from examples/offline_inference_npu.py rename to examples/offline_inference_npu_v0.py index 76d00d2..b6a1156 100644 --- a/examples/offline_inference_npu.py +++ b/examples/offline_inference_npu_v0.py @@ -17,6 +17,11 @@ # Adapted from vllm-project/vllm/examples/offline_inference/basic.py # +import os + +os.environ["VLLM_USE_V1"] = "0" +os.environ["VLLM_USE_MODELSCOPE"] = "True" + from vllm import LLM, SamplingParams prompts = [ diff --git a/examples/offline_inference_npu_v1.py b/examples/offline_inference_npu_v1.py index 939d84b..72486f0 100644 --- a/examples/offline_inference_npu_v1.py +++ b/examples/offline_inference_npu_v1.py @@ -19,10 +19,11 @@ import os +os.environ["VLLM_USE_MODELSCOPE"] = "True" +os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" + from vllm import LLM, SamplingParams -os.environ["VLLM_USE_V1"] = "1" -os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" if __name__ == "__main__": prompts = [ @@ -35,7 +36,7 @@ if __name__ == "__main__": # Create a sampling params object. sampling_params = SamplingParams(max_tokens=100, temperature=0.0) # Create an LLM. - llm = LLM(model="/data/weights/deepseek-ai/deepseekv3-lite-base-latest", + llm = LLM(model="deepseek-ai/DeepSeek-V2-Lite", tensor_parallel_size=2, enforce_eager=True, trust_remote_code=True,