From b149b39353ab5295cca80aa13ec1903e9c7e60d9 Mon Sep 17 00:00:00 2001
From: Brayden Zhong <b8zhong@uwaterloo.ca>
Date: Thu, 27 Mar 2025 22:45:02 -0400
Subject: [PATCH] [CI] Remove unused imports with Ruff to pre-commit config,
 only to benchmarks/docs/examples folder (#3969)

---
 .pre-commit-config.yaml                        |  7 +++++++
 benchmark/hicache/bench_serving.py             |  2 +-
 .../triton_flashinfer_cudnn.py                 |  2 --
 .../benchmark_lightning_attention_decode.py    |  1 -
 benchmark/kernels/rmsnorm/benchmark_rmsnorm.py |  1 -
 ...benchmark_write_req_to_token_pool_triton.py |  3 ---
 benchmark/lora/lora_bench.py                   | 18 ++----------------
 benchmark/mmlu/bench_sglang.py                 |  1 -
 benchmark/mmmu/bench_hf.py                     |  1 -
 benchmark/mmmu/data_utils.py                   |  1 -
 .../multi_turn_chat/long_prompt_multi_turn.py  |  5 -----
 docs/backend/patch.py                          |  1 -
 docs/frontend/patch.py                         |  1 -
 .../quick_start/openai_example_n.py            |  2 --
 .../frontend_language/usage/json_logprobs.py   |  1 -
 .../engine/offline_batch_inference_vlm.py      |  5 -----
 .../hidden_states/hidden_states_server.py      |  2 +-
 examples/runtime/openai_batch_chat.py          |  1 -
 .../openai_chat_with_response_prefill.py       |  1 -
 .../token_in_token_out_llm_server.py           |  2 +-
 .../token_in_token_out_vlm_server.py           |  3 +--
 21 files changed, 13 insertions(+), 48 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f193ab8fa..eb3073577 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,6 +22,13 @@ repos:
     rev: 5.13.2
     hooks:
       - id: isort
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.11.2
+    hooks:
+      - id: ruff
+        args: [--select=F401, --fixable=F401]
+        files: ^(benchmark/|docs/|examples/)
+        exclude: \.ipynb$
   - repo: https://github.com/psf/black
     rev: 24.10.0
     hooks:
diff --git a/benchmark/hicache/bench_serving.py b/benchmark/hicache/bench_serving.py
index b2cfd2573..90f4c6c5f 100644
--- a/benchmark/hicache/bench_serving.py
+++ b/benchmark/hicache/bench_serving.py
@@ -23,7 +23,7 @@ import warnings
 from argparse import ArgumentParser
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
+from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple
 
 import aiohttp
 import numpy as np
diff --git a/benchmark/kernels/decoding_attention_triton/triton_flashinfer_cudnn.py b/benchmark/kernels/decoding_attention_triton/triton_flashinfer_cudnn.py
index f8c87d48d..b61240b05 100644
--- a/benchmark/kernels/decoding_attention_triton/triton_flashinfer_cudnn.py
+++ b/benchmark/kernels/decoding_attention_triton/triton_flashinfer_cudnn.py
@@ -4,8 +4,6 @@ import math
 import cudnn
 import torch
 import torch.utils.benchmark as benchmark
-import triton
-import triton.language as tl
 from flashinfer import BatchDecodeWithPagedKVCacheWrapper
 
 from sglang.srt.layers.attention.triton_ops.decode_attention import decode_attention_fwd
diff --git a/benchmark/kernels/minmax-text-01-lightning_attention/benchmark_lightning_attention_decode.py b/benchmark/kernels/minmax-text-01-lightning_attention/benchmark_lightning_attention_decode.py
index ff6280dc7..78d81499e 100644
--- a/benchmark/kernels/minmax-text-01-lightning_attention/benchmark_lightning_attention_decode.py
+++ b/benchmark/kernels/minmax-text-01-lightning_attention/benchmark_lightning_attention_decode.py
@@ -1,6 +1,5 @@
 import itertools
 import math
-import os
 from typing import Optional, Tuple
 
 import torch
diff --git a/benchmark/kernels/rmsnorm/benchmark_rmsnorm.py b/benchmark/kernels/rmsnorm/benchmark_rmsnorm.py
index ad7b180ce..aeeea62c0 100644
--- a/benchmark/kernels/rmsnorm/benchmark_rmsnorm.py
+++ b/benchmark/kernels/rmsnorm/benchmark_rmsnorm.py
@@ -3,7 +3,6 @@ from typing import Optional, Tuple, Union
 
 import torch
 import triton
-import triton.language as tl
 from flashinfer.norm import fused_add_rmsnorm, rmsnorm
 from torch import nn
 from vllm import _custom_ops as vllm_ops
diff --git a/benchmark/kernels/scheduler_batch/benchmark_write_req_to_token_pool_triton.py b/benchmark/kernels/scheduler_batch/benchmark_write_req_to_token_pool_triton.py
index a9ad7bc5f..1ce43c8ba 100644
--- a/benchmark/kernels/scheduler_batch/benchmark_write_req_to_token_pool_triton.py
+++ b/benchmark/kernels/scheduler_batch/benchmark_write_req_to_token_pool_triton.py
@@ -1,9 +1,6 @@
 import itertools
 import os
-from typing import List
 
-import numpy as np
-import pytest
 import torch
 import triton
 import triton.language as tl
diff --git a/benchmark/lora/lora_bench.py b/benchmark/lora/lora_bench.py
index b5af65a7d..1d9394261 100644
--- a/benchmark/lora/lora_bench.py
+++ b/benchmark/lora/lora_bench.py
@@ -15,42 +15,28 @@
 import argparse
 import asyncio
 import json
-import os
 import random
 import resource
 import sys
 import time
 import traceback
-import warnings
 from argparse import ArgumentParser
-from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple
 
 import aiohttp
 import numpy as np
-import requests
 from launch_server import LORA_PATH, NUM_LORAS
 from tqdm.asyncio import tqdm
-from transformers import (
-    AutoTokenizer,
-    PreTrainedTokenizer,
-    PreTrainedTokenizerBase,
-    PreTrainedTokenizerFast,
-)
+from transformers import PreTrainedTokenizerBase
 
 from sglang.bench_serving import (
     AIOHTTP_TIMEOUT,
-    SHAREGPT_URL,
-    BenchmarkMetrics,
     RequestFuncInput,
     RequestFuncOutput,
     calculate_metrics,
-    check_chat_template,
-    get_model,
     get_request,
     get_tokenizer,
-    parse_request_rate_range,
     remove_prefix,
     sample_random_requests,
 )
diff --git a/benchmark/mmlu/bench_sglang.py b/benchmark/mmlu/bench_sglang.py
index 00176343c..210b6111e 100644
--- a/benchmark/mmlu/bench_sglang.py
+++ b/benchmark/mmlu/bench_sglang.py
@@ -6,7 +6,6 @@ import time
 import numpy as np
 import pandas as pd
 import tiktoken
-from tqdm import tqdm
 
 from sglang.test.test_utils import (
     add_common_sglang_args_and_parse,
diff --git a/benchmark/mmmu/bench_hf.py b/benchmark/mmmu/bench_hf.py
index 2a5078a37..c6588c7b9 100644
--- a/benchmark/mmmu/bench_hf.py
+++ b/benchmark/mmmu/bench_hf.py
@@ -1,6 +1,5 @@
 import argparse
 
-import PIL.Image
 import torch
 from data_utils import save_json
 from eval_utils import (
diff --git a/benchmark/mmmu/data_utils.py b/benchmark/mmmu/data_utils.py
index 197e90638..cf8916934 100644
--- a/benchmark/mmmu/data_utils.py
+++ b/benchmark/mmmu/data_utils.py
@@ -5,7 +5,6 @@ import os
 import re
 
 import yaml
-from datasets import concatenate_datasets, load_dataset
 
 DOMAIN_CAT2SUB_CAT = {
     "Art and Design": ["Art", "Art_Theory", "Design", "Music"],
diff --git a/benchmark/multi_turn_chat/long_prompt_multi_turn.py b/benchmark/multi_turn_chat/long_prompt_multi_turn.py
index decd8a72f..20f6dd5e3 100644
--- a/benchmark/multi_turn_chat/long_prompt_multi_turn.py
+++ b/benchmark/multi_turn_chat/long_prompt_multi_turn.py
@@ -1,13 +1,8 @@
-import itertools
 import json
-import os
 import random
-import string
-import threading
 import time
 from argparse import ArgumentParser
 from pathlib import Path
-from typing import Union
 
 from tqdm import tqdm
 
diff --git a/docs/backend/patch.py b/docs/backend/patch.py
index 83f52fd3f..ce7ba3734 100644
--- a/docs/backend/patch.py
+++ b/docs/backend/patch.py
@@ -1,4 +1,3 @@
-import os
 import weakref
 
 import nest_asyncio
diff --git a/docs/frontend/patch.py b/docs/frontend/patch.py
index d16422d08..b39951d7b 100644
--- a/docs/frontend/patch.py
+++ b/docs/frontend/patch.py
@@ -1,4 +1,3 @@
-import os
 import weakref
 
 from sglang.utils import execute_shell_command, reserve_port
diff --git a/examples/frontend_language/quick_start/openai_example_n.py b/examples/frontend_language/quick_start/openai_example_n.py
index 06f533003..25372b9f4 100644
--- a/examples/frontend_language/quick_start/openai_example_n.py
+++ b/examples/frontend_language/quick_start/openai_example_n.py
@@ -4,8 +4,6 @@ export OPENAI_API_KEY=sk-******
 python3 openai_example_chat.py
 """
 
-import json
-
 import sglang as sgl
 
 
diff --git a/examples/frontend_language/usage/json_logprobs.py b/examples/frontend_language/usage/json_logprobs.py
index fa0e1b81f..15206a619 100644
--- a/examples/frontend_language/usage/json_logprobs.py
+++ b/examples/frontend_language/usage/json_logprobs.py
@@ -1,5 +1,4 @@
 # NOTE: Currently this can only be run through HTTP requests.
-import json
 from concurrent.futures import ThreadPoolExecutor
 
 from json_decode import character_regex
diff --git a/examples/runtime/engine/offline_batch_inference_vlm.py b/examples/runtime/engine/offline_batch_inference_vlm.py
index 28ab7a2ef..4063136f9 100644
--- a/examples/runtime/engine/offline_batch_inference_vlm.py
+++ b/examples/runtime/engine/offline_batch_inference_vlm.py
@@ -5,11 +5,6 @@ python offline_batch_inference_vlm.py --model-path Qwen/Qwen2-VL-7B-Instruct --c
 
 import argparse
 import dataclasses
-import io
-import os
-
-import requests
-from PIL import Image
 
 import sglang as sgl
 from sglang.srt.conversation import chat_templates
diff --git a/examples/runtime/hidden_states/hidden_states_server.py b/examples/runtime/hidden_states/hidden_states_server.py
index 39b4e464e..96045fad9 100644
--- a/examples/runtime/hidden_states/hidden_states_server.py
+++ b/examples/runtime/hidden_states/hidden_states_server.py
@@ -12,7 +12,7 @@ import requests
 import torch
 
 from sglang.test.test_utils import is_in_ci
-from sglang.utils import print_highlight, terminate_process, wait_for_server
+from sglang.utils import terminate_process, wait_for_server
 
 if is_in_ci():
     from docs.backend.patch import launch_server_cmd
diff --git a/examples/runtime/openai_batch_chat.py b/examples/runtime/openai_batch_chat.py
index 1081f0a69..d251ca0ff 100644
--- a/examples/runtime/openai_batch_chat.py
+++ b/examples/runtime/openai_batch_chat.py
@@ -11,7 +11,6 @@ you should create the input.jsonl file with the following content:
 """
 
 import json
-import os
 import time
 
 import openai
diff --git a/examples/runtime/openai_chat_with_response_prefill.py b/examples/runtime/openai_chat_with_response_prefill.py
index 1b1604b30..c80f97d11 100644
--- a/examples/runtime/openai_chat_with_response_prefill.py
+++ b/examples/runtime/openai_chat_with_response_prefill.py
@@ -5,7 +5,6 @@ python openai_chat.py
 """
 
 import openai
-from openai import OpenAI
 
 client = openai.Client(base_url="http://127.0.0.1:30000/v1", api_key="EMPTY")
 
diff --git a/examples/runtime/token_in_token_out/token_in_token_out_llm_server.py b/examples/runtime/token_in_token_out/token_in_token_out_llm_server.py
index d3d76a784..00c0988b2 100644
--- a/examples/runtime/token_in_token_out/token_in_token_out_llm_server.py
+++ b/examples/runtime/token_in_token_out/token_in_token_out_llm_server.py
@@ -9,7 +9,7 @@ import requests
 
 from sglang.srt.hf_transformers_utils import get_tokenizer
 from sglang.test.test_utils import is_in_ci
-from sglang.utils import print_highlight, terminate_process, wait_for_server
+from sglang.utils import terminate_process, wait_for_server
 
 if is_in_ci():
     from docs.backend.patch import launch_server_cmd
diff --git a/examples/runtime/token_in_token_out/token_in_token_out_vlm_server.py b/examples/runtime/token_in_token_out/token_in_token_out_vlm_server.py
index b7d748df1..01c485863 100644
--- a/examples/runtime/token_in_token_out/token_in_token_out_vlm_server.py
+++ b/examples/runtime/token_in_token_out/token_in_token_out_vlm_server.py
@@ -13,9 +13,8 @@ from PIL import Image
 from transformers import AutoProcessor
 
 from sglang.lang.chat_template import get_chat_template_by_model_path
-from sglang.srt.hf_transformers_utils import get_tokenizer
 from sglang.test.test_utils import DEFAULT_IMAGE_URL, is_in_ci
-from sglang.utils import print_highlight, terminate_process, wait_for_server
+from sglang.utils import terminate_process, wait_for_server
 
 if is_in_ci():
     from docs.backend.patch import launch_server_cmd