diff --git a/examples/offline_inference_npu.py b/examples/offline_inference_npu_v0.py similarity index 94% rename from examples/offline_inference_npu.py rename to examples/offline_inference_npu_v0.py index 76d00d2..b6a1156 100644 --- a/examples/offline_inference_npu.py +++ b/examples/offline_inference_npu_v0.py @@ -17,6 +17,11 @@ # Adapted from vllm-project/vllm/examples/offline_inference/basic.py # +import os + +os.environ["VLLM_USE_V1"] = "0" +os.environ["VLLM_USE_MODELSCOPE"] = "True" + from vllm import LLM, SamplingParams prompts = [ diff --git a/examples/offline_inference_npu_v1.py b/examples/offline_inference_npu_v1.py index 939d84b..72486f0 100644 --- a/examples/offline_inference_npu_v1.py +++ b/examples/offline_inference_npu_v1.py @@ -19,10 +19,11 @@ import os +os.environ["VLLM_USE_MODELSCOPE"] = "True" +os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" + from vllm import LLM, SamplingParams -os.environ["VLLM_USE_V1"] = "1" -os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" if __name__ == "__main__": prompts = [ @@ -35,7 +36,7 @@ if __name__ == "__main__": # Create a sampling params object. sampling_params = SamplingParams(max_tokens=100, temperature=0.0) # Create an LLM. - llm = LLM(model="/data/weights/deepseek-ai/deepseekv3-lite-base-latest", + llm = LLM(model="deepseek-ai/DeepSeek-V2-Lite", tensor_parallel_size=2, enforce_eager=True, trust_remote_code=True,