[Misc][V0 Deprecation] Add __main__ guard to all offline examples (#1837)

### What this PR does / why we need it?
Add `__main__` guard to all offline examples.

- vLLM version: v0.9.2
- vLLM main:
76b494444f

---------

Signed-off-by: shen-shanshan <467638484@qq.com>
This commit is contained in:
Shanshan Shen
2025-07-17 14:13:30 +08:00
committed by GitHub
parent 19e37cd379
commit aeb5aa8b88
15 changed files with 157 additions and 87 deletions

View File

@@ -12,6 +12,9 @@ import os
import time import time
from multiprocessing import Event, Process from multiprocessing import Event, Process
os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
kv_connector_extra_config = { kv_connector_extra_config = {
"prefill_device_ips": ["1.2.3.1", "1.2.3.2"], "prefill_device_ips": ["1.2.3.1", "1.2.3.2"],
"decode_device_ips": ["1.2.3.9", "1.2.3.10"], "decode_device_ips": ["1.2.3.9", "1.2.3.10"],

View File

@@ -13,6 +13,9 @@ import msgpack # type: ignore
import zmq import zmq
from quart import Quart, make_response, request from quart import Quart, make_response, request
os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
DP_PROXY_HTTP_PORT = 10004 DP_PROXY_HTTP_PORT = 10004
DP_PROXY_ZMQ_REG_PORT = 30006 DP_PROXY_ZMQ_REG_PORT = 30006
DP_PROXY_ZMQ_NOTIFY_PORT = 30005 DP_PROXY_ZMQ_NOTIFY_PORT = 30005

View File

@@ -8,6 +8,9 @@ import msgpack # type: ignore
import zmq import zmq
from quart import Quart, make_response, request from quart import Quart, make_response, request
os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
prefill_instances: dict[str, str] = {} # http_address: zmq_address prefill_instances: dict[str, str] = {} # http_address: zmq_address
decode_instances: dict[str, str] = {} # http_address: zmq_address decode_instances: dict[str, str] = {} # http_address: zmq_address

View File

@@ -8,6 +8,9 @@ import matplotlib.pyplot as plt # type: ignore
import numpy as np import numpy as np
import torch import torch
os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
logger = logging.getLogger("msit_logger") logger = logging.getLogger("msit_logger")

View File

@@ -60,6 +60,9 @@ from time import sleep
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
from vllm.utils import get_open_port from vllm.utils import get_open_port
os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
def parse_args(): def parse_args():
import argparse import argparse

View File

@@ -21,6 +21,8 @@ import os
import time import time
from multiprocessing import Event, Process from multiprocessing import Event, Process
os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
def clean_up(): def clean_up():
import gc import gc

View File

@@ -17,8 +17,13 @@
# Adapted from vllm-project/vllm/examples/offline_inference/basic.py # Adapted from vllm-project/vllm/examples/offline_inference/basic.py
# #
import os
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
def main():
prompts = [ prompts = [
"Hello, my name is", "Hello, my name is",
"The president of the United States is", "The president of the United States is",
@@ -42,3 +47,7 @@ for output in outputs:
prompt = output.prompt prompt = output.prompt
generated_text = output.outputs[0].text generated_text = output.outputs[0].text
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
if __name__ == "__main__":
main()

View File

@@ -3,6 +3,8 @@ import time
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
# enable dual-batch overlap for vllm ascend # enable dual-batch overlap for vllm ascend
os.environ["VLLM_ASCEND_ENABLE_DBO"] = "1" os.environ["VLLM_ASCEND_ENABLE_DBO"] = "1"

View File

@@ -19,16 +19,17 @@
import os import os
os.environ["VLLM_USE_MODELSCOPE"] = "True"
import torch import torch
from vllm import LLM from vllm import LLM
os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
def get_detailed_instruct(task_description: str, query: str) -> str: def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery:{query}' return f'Instruct: {task_description}\nQuery:{query}'
def main():
# Each query must come with a one-sentence instruction that describes the task # Each query must come with a one-sentence instruction that describes the task
task = 'Given a web search query, retrieve relevant passages that answer the query' task = 'Given a web search query, retrieve relevant passages that answer the query'
@@ -51,3 +52,7 @@ embeddings = torch.tensor([o.outputs.embedding for o in outputs])
scores = (embeddings[:2] @ embeddings[2:].T) scores = (embeddings[:2] @ embeddings[2:].T)
print(scores.tolist()) print(scores.tolist())
# [[0.7620252966880798, 0.14078938961029053], [0.1358368694782257, 0.6013815999031067]] # [[0.7620252966880798, 0.14078938961029053], [0.1358368694782257, 0.6013815999031067]]
if __name__ == "__main__":
main()

View File

@@ -24,9 +24,14 @@ For most models, the prompt format should follow corresponding examples
on HuggingFace model repository. on HuggingFace model repository.
""" """
import os
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
from vllm.assets.audio import AudioAsset from vllm.assets.audio import AudioAsset
os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
audio_assets = [AudioAsset("mary_had_lamb"), AudioAsset("winning_call")] audio_assets = [AudioAsset("mary_had_lamb"), AudioAsset("winning_call")]
question_per_audio_count = { question_per_audio_count = {
1: "What is recited in the audio?", 1: "What is recited in the audio?",

View File

@@ -21,9 +21,12 @@
import os import os
os.environ["VLLM_USE_MODELSCOPE"] = "True" os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
def main():
prompts = [ prompts = [
"Hello, my name is", "Hello, my name is",
"The president of the United States is", "The president of the United States is",
@@ -42,3 +45,7 @@ for output in outputs:
prompt = output.prompt prompt = output.prompt
generated_text = output.outputs[0].text generated_text = output.outputs[0].text
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
if __name__ == "__main__":
main()

View File

@@ -25,7 +25,8 @@ os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
if __name__ == "__main__":
def main():
prompts = [ prompts = [
"Hello, my name is", "Hello, my name is",
"The president of the United States is", "The president of the United States is",
@@ -48,3 +49,7 @@ if __name__ == "__main__":
prompt = output.prompt prompt = output.prompt
generated_text = output.outputs[0].text generated_text = output.outputs[0].text
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
if __name__ == "__main__":
main()

View File

@@ -25,7 +25,7 @@ from vllm.utils import GiB_bytes
os.environ["VLLM_USE_MODELSCOPE"] = "True" os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
if __name__ == "__main__": def main():
prompt = "How are you?" prompt = "How are you?"
free, total = torch.npu.mem_get_info() free, total = torch.npu.mem_get_info()
@@ -51,3 +51,7 @@ if __name__ == "__main__":
output2 = llm.generate(prompt, sampling_params) output2 = llm.generate(prompt, sampling_params)
# cmp output # cmp output
assert output[0].outputs[0].text == output2[0].outputs[0].text assert output[0].outputs[0].text == output2[0].outputs[0].text
if __name__ == "__main__":
main()

View File

@@ -17,8 +17,15 @@
# limitations under the License. # limitations under the License.
# #
import os
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
def main():
prompts = [ prompts = [
"Hello, my name is", "Hello, my name is",
"The president of the United States is", "The president of the United States is",
@@ -48,3 +55,7 @@ for output in outputs:
prompt = output.prompt prompt = output.prompt
generated_text = output.outputs[0].text generated_text = output.outputs[0].text
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
if __name__ == "__main__":
main()

View File

@@ -1,8 +1,13 @@
import os
import torch import torch
from transformers import (AutoModelForCausalLM, AutoTokenizer, from transformers import (AutoModelForCausalLM, AutoTokenizer,
PreTrainedTokenizer) PreTrainedTokenizer)
from vllm import LLM from vllm import LLM
os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
def init_tokenizer_and_llm(model_name: str): def init_tokenizer_and_llm(model_name: str):
tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name)