[Lint]Style: Convert example to ruff format (#5863)
### What this PR does / why we need it?
This PR fixes linting issues in the `example/` to align with the
project's Ruff configuration.
- vLLM version: v0.13.0
- vLLM main:
bde38c11df
Signed-off-by: root <root@LAPTOP-VQKDDVMG.localdomain>
Co-authored-by: root <root@LAPTOP-VQKDDVMG.localdomain>
This commit is contained in:
@@ -25,11 +25,12 @@ from vllm.utils.mem_constants import GiB_bytes
|
||||
os.environ["VLLM_USE_MODELSCOPE"] = "True"
|
||||
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
|
||||
|
||||
|
||||
def main():
|
||||
prompt = "How are you?"
|
||||
|
||||
free, total = torch.npu.mem_get_info()
|
||||
print(f"Free memory before sleep: {free / 1024 ** 3:.2f} GiB")
|
||||
print(f"Free memory before sleep: {free / 1024**3:.2f} GiB")
|
||||
# record npu memory use baseline in case other process is running
|
||||
used_bytes_baseline = total - free
|
||||
llm = LLM("Qwen/Qwen2.5-0.5B-Instruct", enable_sleep_mode=True)
|
||||
@@ -39,9 +40,7 @@ def main():
|
||||
llm.sleep(level=1)
|
||||
|
||||
free_npu_bytes_after_sleep, total = torch.npu.mem_get_info()
|
||||
print(
|
||||
f"Free memory after sleep: {free_npu_bytes_after_sleep / 1024 ** 3:.2f} GiB"
|
||||
)
|
||||
print(f"Free memory after sleep: {free_npu_bytes_after_sleep / 1024**3:.2f} GiB")
|
||||
used_bytes = total - free_npu_bytes_after_sleep - used_bytes_baseline
|
||||
# now the memory usage should be less than the model weights
|
||||
# (0.5B model, 1GiB weights)
|
||||
|
||||
Reference in New Issue
Block a user