feat: Improve the offline_inference npu v0/v1 scripts (#1669)
### What this PR does / why we need it?
Improve
- Keep the same file name format as v1, `offline_inference_npu_v0.py`,
`offline_inference_npu_v1.py`
- Use `VLLM_USE_V1` = 0/1 clearly in py scripts
- Fix some run errors in `offline_inference_npu_v1.py`, e.g.
`deepseekv3-lite-base-latest` not exists in modescope or hf.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
- vLLM version: v0.9.2
- vLLM main:
baed180aa0
Signed-off-by: xleoken <xleoken@163.com>
This commit is contained in:
@@ -17,6 +17,11 @@
|
|||||||
# Adapted from vllm-project/vllm/examples/offline_inference/basic.py
|
# Adapted from vllm-project/vllm/examples/offline_inference/basic.py
|
||||||
#
|
#
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["VLLM_USE_V1"] = "0"
|
||||||
|
os.environ["VLLM_USE_MODELSCOPE"] = "True"
|
||||||
|
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
|
|
||||||
prompts = [
|
prompts = [
|
||||||
@@ -19,10 +19,11 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
os.environ["VLLM_USE_MODELSCOPE"] = "True"
|
||||||
|
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
|
||||||
|
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
|
|
||||||
os.environ["VLLM_USE_V1"] = "1"
|
|
||||||
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
prompts = [
|
prompts = [
|
||||||
@@ -35,7 +36,7 @@ if __name__ == "__main__":
|
|||||||
# Create a sampling params object.
|
# Create a sampling params object.
|
||||||
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
|
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
|
||||||
# Create an LLM.
|
# Create an LLM.
|
||||||
llm = LLM(model="/data/weights/deepseek-ai/deepseekv3-lite-base-latest",
|
llm = LLM(model="deepseek-ai/DeepSeek-V2-Lite",
|
||||||
tensor_parallel_size=2,
|
tensor_parallel_size=2,
|
||||||
enforce_eager=True,
|
enforce_eager=True,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
|
|||||||
Reference in New Issue
Block a user