feat: Improve the offline_inference npu v0/v1 scripts (#1669)

### What this PR does / why we need it?

Improve
- Keep the same file name format as v1, `offline_inference_npu_v0.py`,
`offline_inference_npu_v1.py`
- Use `VLLM_USE_V1` = 0/1 clearly in py scripts
- Fix some run errors in `offline_inference_npu_v1.py`, e.g.
`deepseekv3-lite-base-latest` not exists in modescope or hf.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

- vLLM version: v0.9.2
- vLLM main:
baed180aa0

Signed-off-by: xleoken <xleoken@163.com>
This commit is contained in:
xleoken
2025-07-09 17:03:53 +08:00
committed by GitHub
parent 6af35f60cc
commit 3ef45d0cc2
2 changed files with 9 additions and 3 deletions

View File

@@ -17,6 +17,11 @@
# Adapted from vllm-project/vllm/examples/offline_inference/basic.py # Adapted from vllm-project/vllm/examples/offline_inference/basic.py
# #
import os
os.environ["VLLM_USE_V1"] = "0"
os.environ["VLLM_USE_MODELSCOPE"] = "True"
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
prompts = [ prompts = [

View File

@@ -19,10 +19,11 @@
import os import os
os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
os.environ["VLLM_USE_V1"] = "1"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
if __name__ == "__main__": if __name__ == "__main__":
prompts = [ prompts = [
@@ -35,7 +36,7 @@ if __name__ == "__main__":
# Create a sampling params object. # Create a sampling params object.
sampling_params = SamplingParams(max_tokens=100, temperature=0.0) sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
# Create an LLM. # Create an LLM.
llm = LLM(model="/data/weights/deepseek-ai/deepseekv3-lite-base-latest", llm = LLM(model="deepseek-ai/DeepSeek-V2-Lite",
tensor_parallel_size=2, tensor_parallel_size=2,
enforce_eager=True, enforce_eager=True,
trust_remote_code=True, trust_remote_code=True,