[Lint]Style: Convert example to ruff format (#5863)

### What this PR does / why we need it? This PR fixes linting issues in the `example/` to align with the project's Ruff configuration. - vLLM version: v0.13.0 - vLLM main: bde38c11df Signed-off-by: root <root@LAPTOP-VQKDDVMG.localdomain> Co-authored-by: root <root@LAPTOP-VQKDDVMG.localdomain>
2026-01-13 20:46:50 +08:00
parent f7b904641e
commit 78d5ce3e01
23 changed files with 678 additions and 1037 deletions
--- a/examples/offline_inference_sleep_mode_npu.py
+++ b/examples/offline_inference_sleep_mode_npu.py
@@ -25,11 +25,12 @@ from vllm.utils.mem_constants import GiB_bytes
 os.environ["VLLM_USE_MODELSCOPE"] = "True"
 os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"

+
 def main():
    prompt = "How are you?"

    free, total = torch.npu.mem_get_info()
-    print(f"Free memory before sleep: {free / 1024 ** 3:.2f} GiB")
+    print(f"Free memory before sleep: {free / 1024**3:.2f} GiB")
    # record npu memory use baseline in case other process is running
    used_bytes_baseline = total - free
    llm = LLM("Qwen/Qwen2.5-0.5B-Instruct", enable_sleep_mode=True)
@@ -39,9 +40,7 @@ def main():
    llm.sleep(level=1)

    free_npu_bytes_after_sleep, total = torch.npu.mem_get_info()
-    print(
-        f"Free memory after sleep: {free_npu_bytes_after_sleep / 1024 ** 3:.2f} GiB"
-    )
+    print(f"Free memory after sleep: {free_npu_bytes_after_sleep / 1024**3:.2f} GiB")
    used_bytes = total - free_npu_bytes_after_sleep - used_bytes_baseline
    # now the memory usage should be less than the model weights
    # (0.5B model, 1GiB weights)