[CI] Remove unused imports with Ruff to pre-commit config, only to benchmarks/docs/examples folder (#3969)

2025-03-27 22:45:02 -04:00
parent 31dfff7da7
commit b149b39353
21 changed files with 13 additions and 48 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,6 +22,13 @@ repos:
    rev: 5.13.2
    hooks:
      - id: isort
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.11.2
+    hooks:
+      - id: ruff
+        args: [--select=F401, --fixable=F401]
+        files: ^(benchmark/|docs/|examples/)
+        exclude: \.ipynb$
  - repo: https://github.com/psf/black
    rev: 24.10.0
    hooks:
--- a/benchmark/hicache/bench_serving.py
+++ b/benchmark/hicache/bench_serving.py
@@ -23,7 +23,7 @@ import warnings
 from argparse import ArgumentParser
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
+from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple

 import aiohttp
 import numpy as np
--- a/benchmark/kernels/decoding_attention_triton/triton_flashinfer_cudnn.py
+++ b/benchmark/kernels/decoding_attention_triton/triton_flashinfer_cudnn.py
@@ -4,8 +4,6 @@ import math
 import cudnn
 import torch
 import torch.utils.benchmark as benchmark
-import triton
-import triton.language as tl
 from flashinfer import BatchDecodeWithPagedKVCacheWrapper

 from sglang.srt.layers.attention.triton_ops.decode_attention import decode_attention_fwd
--- a/benchmark/kernels/minmax-text-01-lightning_attention/benchmark_lightning_attention_decode.py
+++ b/benchmark/kernels/minmax-text-01-lightning_attention/benchmark_lightning_attention_decode.py
@@ -1,6 +1,5 @@
 import itertools
 import math
-import os
 from typing import Optional, Tuple

 import torch
--- a/benchmark/kernels/rmsnorm/benchmark_rmsnorm.py
+++ b/benchmark/kernels/rmsnorm/benchmark_rmsnorm.py
@@ -3,7 +3,6 @@ from typing import Optional, Tuple, Union

 import torch
 import triton
-import triton.language as tl
 from flashinfer.norm import fused_add_rmsnorm, rmsnorm
 from torch import nn
 from vllm import _custom_ops as vllm_ops
--- a/benchmark/kernels/scheduler_batch/benchmark_write_req_to_token_pool_triton.py
+++ b/benchmark/kernels/scheduler_batch/benchmark_write_req_to_token_pool_triton.py
@@ -1,9 +1,6 @@
 import itertools
 import os
-from typing import List

-import numpy as np
-import pytest
 import torch
 import triton
 import triton.language as tl
--- a/benchmark/lora/lora_bench.py
+++ b/benchmark/lora/lora_bench.py
@@ -15,42 +15,28 @@
 import argparse
 import asyncio
 import json
-import os
 import random
 import resource
 import sys
 import time
 import traceback
-import warnings
 from argparse import ArgumentParser
-from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple

 import aiohttp
 import numpy as np
-import requests
 from launch_server import LORA_PATH, NUM_LORAS
 from tqdm.asyncio import tqdm
-from transformers import (
-    AutoTokenizer,
-    PreTrainedTokenizer,
-    PreTrainedTokenizerBase,
-    PreTrainedTokenizerFast,
-)
+from transformers import PreTrainedTokenizerBase

 from sglang.bench_serving import (
    AIOHTTP_TIMEOUT,
-    SHAREGPT_URL,
-    BenchmarkMetrics,
    RequestFuncInput,
    RequestFuncOutput,
    calculate_metrics,
-    check_chat_template,
-    get_model,
    get_request,
    get_tokenizer,
-    parse_request_rate_range,
    remove_prefix,
    sample_random_requests,
 )
--- a/benchmark/mmlu/bench_sglang.py
+++ b/benchmark/mmlu/bench_sglang.py
@@ -6,7 +6,6 @@ import time
 import numpy as np
 import pandas as pd
 import tiktoken
-from tqdm import tqdm

 from sglang.test.test_utils import (
    add_common_sglang_args_and_parse,
--- a/benchmark/mmmu/bench_hf.py
+++ b/benchmark/mmmu/bench_hf.py
@@ -1,6 +1,5 @@
 import argparse

-import PIL.Image
 import torch
 from data_utils import save_json
 from eval_utils import (
--- a/benchmark/mmmu/data_utils.py
+++ b/benchmark/mmmu/data_utils.py
@@ -5,7 +5,6 @@ import os
 import re

 import yaml
-from datasets import concatenate_datasets, load_dataset

 DOMAIN_CAT2SUB_CAT = {
    "Art and Design": ["Art", "Art_Theory", "Design", "Music"],
--- a/benchmark/multi_turn_chat/long_prompt_multi_turn.py
+++ b/benchmark/multi_turn_chat/long_prompt_multi_turn.py
@@ -1,13 +1,8 @@
-import itertools
 import json
-import os
 import random
-import string
-import threading
 import time
 from argparse import ArgumentParser
 from pathlib import Path
-from typing import Union

 from tqdm import tqdm

--- a/docs/backend/patch.py
+++ b/docs/backend/patch.py
@@ -1,4 +1,3 @@
-import os
 import weakref

 import nest_asyncio
--- a/docs/frontend/patch.py
+++ b/docs/frontend/patch.py
@@ -1,4 +1,3 @@
-import os
 import weakref

 from sglang.utils import execute_shell_command, reserve_port
--- a/examples/frontend_language/quick_start/openai_example_n.py
+++ b/examples/frontend_language/quick_start/openai_example_n.py
@@ -4,8 +4,6 @@ export OPENAI_API_KEY=sk-******
 python3 openai_example_chat.py
 """

-import json
-
 import sglang as sgl


--- a/examples/frontend_language/usage/json_logprobs.py
+++ b/examples/frontend_language/usage/json_logprobs.py
@@ -1,5 +1,4 @@
 # NOTE: Currently this can only be run through HTTP requests.
-import json
 from concurrent.futures import ThreadPoolExecutor

 from json_decode import character_regex
--- a/examples/runtime/engine/offline_batch_inference_vlm.py
+++ b/examples/runtime/engine/offline_batch_inference_vlm.py
@@ -5,11 +5,6 @@ python offline_batch_inference_vlm.py --model-path Qwen/Qwen2-VL-7B-Instruct --c

 import argparse
 import dataclasses
-import io
-import os
-
-import requests
-from PIL import Image

 import sglang as sgl
 from sglang.srt.conversation import chat_templates
--- a/examples/runtime/hidden_states/hidden_states_server.py
+++ b/examples/runtime/hidden_states/hidden_states_server.py
@@ -12,7 +12,7 @@ import requests
 import torch

 from sglang.test.test_utils import is_in_ci
-from sglang.utils import print_highlight, terminate_process, wait_for_server
+from sglang.utils import terminate_process, wait_for_server

 if is_in_ci():
    from docs.backend.patch import launch_server_cmd
--- a/examples/runtime/openai_batch_chat.py
+++ b/examples/runtime/openai_batch_chat.py
@@ -11,7 +11,6 @@ you should create the input.jsonl file with the following content:
 """

 import json
-import os
 import time

 import openai
--- a/examples/runtime/openai_chat_with_response_prefill.py
+++ b/examples/runtime/openai_chat_with_response_prefill.py
@@ -5,7 +5,6 @@ python openai_chat.py
 """

 import openai
-from openai import OpenAI

 client = openai.Client(base_url="http://127.0.0.1:30000/v1", api_key="EMPTY")

--- a/examples/runtime/token_in_token_out/token_in_token_out_llm_server.py
+++ b/examples/runtime/token_in_token_out/token_in_token_out_llm_server.py
@@ -9,7 +9,7 @@ import requests

 from sglang.srt.hf_transformers_utils import get_tokenizer
 from sglang.test.test_utils import is_in_ci
-from sglang.utils import print_highlight, terminate_process, wait_for_server
+from sglang.utils import terminate_process, wait_for_server

 if is_in_ci():
    from docs.backend.patch import launch_server_cmd
--- a/examples/runtime/token_in_token_out/token_in_token_out_vlm_server.py
+++ b/examples/runtime/token_in_token_out/token_in_token_out_vlm_server.py
@@ -13,9 +13,8 @@ from PIL import Image
 from transformers import AutoProcessor

 from sglang.lang.chat_template import get_chat_template_by_model_path
-from sglang.srt.hf_transformers_utils import get_tokenizer
 from sglang.test.test_utils import DEFAULT_IMAGE_URL, is_in_ci
-from sglang.utils import print_highlight, terminate_process, wait_for_server
+from sglang.utils import terminate_process, wait_for_server

 if is_in_ci():
    from docs.backend.patch import launch_server_cmd