From 150d7020ed8fcba4f3fdef52b770850aff8ae048 Mon Sep 17 00:00:00 2001
From: Liangsheng Yin <hnyls2002@gmail.com>
Date: Tue, 23 Apr 2024 22:36:33 +0800
Subject: [PATCH] Revert removing the unused imports (#385)

---
 python/sglang/api.py                              | 4 ++++
 python/sglang/backend/anthropic.py                | 4 ++++
 python/sglang/backend/base_backend.py             | 2 +-
 python/sglang/backend/openai.py                   | 2 +-
 python/sglang/backend/runtime_endpoint.py         | 7 ++++---
 python/sglang/backend/vertexai.py                 | 3 +++
 python/sglang/lang/chat_template.py               | 4 ++--
 python/sglang/lang/compiler.py                    | 8 +++++++-
 python/sglang/lang/interpreter.py                 | 3 ++-
 python/sglang/lang/ir.py                          | 2 +-
 python/sglang/lang/tracer.py                      | 6 +++++-
 python/sglang/srt/hf_transformers_utils.py        | 2 +-
 python/sglang/srt/managers/detokenizer_manager.py | 2 +-
 python/sglang/srt/managers/router/radix_cache.py  | 2 ++
 python/sglang/srt/models/commandr.py              | 2 +-
 python/sglang/srt/models/llama2.py                | 2 +-
 python/sglang/srt/models/llava.py                 | 2 +-
 python/sglang/srt/models/mixtral.py               | 2 +-
 python/sglang/srt/models/qwen.py                  | 2 +-
 python/sglang/srt/models/qwen2.py                 | 2 +-
 python/sglang/srt/models/yivl.py                  | 4 +++-
 python/sglang/srt/server.py                       | 8 ++++----
 python/sglang/srt/utils.py                        | 1 +
 test/lang/run_all.py                              | 1 +
 test/lang/test_anthropic_backend.py               | 1 +
 test/lang/test_srt_backend.py                     | 3 +++
 test/lang/test_tracing.py                         | 2 +-
 test/srt/model/reference_hf.py                    | 1 +
 test/srt/model/test_llama_extend.py               | 4 ++++
 test/srt/model/test_llava_low_api.py              | 5 ++++-
 test/srt/test_httpserver_concurrent.py            | 3 +++
 test/srt/test_httpserver_llava.py                 | 1 +
 test/srt/test_httpserver_reuse.py                 | 1 +
 33 files changed, 72 insertions(+), 26 deletions(-)

diff --git a/python/sglang/api.py b/python/sglang/api.py
index 21a9a13fa..9470b1425 100644
--- a/python/sglang/api.py
+++ b/python/sglang/api.py
@@ -3,7 +3,11 @@
 import re
 from typing import Callable, List, Optional, Union
 
+from sglang.backend.anthropic import Anthropic
 from sglang.backend.base_backend import BaseBackend
+from sglang.backend.openai import OpenAI
+from sglang.backend.runtime_endpoint import RuntimeEndpoint
+from sglang.backend.vertexai import VertexAI
 from sglang.global_config import global_config
 from sglang.lang.ir import (
     SglExpr,
diff --git a/python/sglang/backend/anthropic.py b/python/sglang/backend/anthropic.py
index 82b3ab7b0..851bc176a 100644
--- a/python/sglang/backend/anthropic.py
+++ b/python/sglang/backend/anthropic.py
@@ -1,3 +1,7 @@
+from typing import List, Optional, Union
+
+import numpy as np
+
 from sglang.backend.base_backend import BaseBackend
 from sglang.lang.chat_template import get_chat_template
 from sglang.lang.interpreter import StreamExecutor
diff --git a/python/sglang/backend/base_backend.py b/python/sglang/backend/base_backend.py
index 606b821a8..cb504f51b 100644
--- a/python/sglang/backend/base_backend.py
+++ b/python/sglang/backend/base_backend.py
@@ -1,4 +1,4 @@
-from typing import List, Optional, Union
+from typing import Callable, List, Optional, Union
 
 from sglang.lang.chat_template import get_chat_template
 from sglang.lang.interpreter import StreamExecutor
diff --git a/python/sglang/backend/openai.py b/python/sglang/backend/openai.py
index 06f80c341..3c0210975 100644
--- a/python/sglang/backend/openai.py
+++ b/python/sglang/backend/openai.py
@@ -1,6 +1,6 @@
 import logging
 import time
-from typing import List, Optional
+from typing import Callable, List, Optional, Union
 
 import numpy as np
 
diff --git a/python/sglang/backend/runtime_endpoint.py b/python/sglang/backend/runtime_endpoint.py
index 13e905e3e..77b9a3277 100644
--- a/python/sglang/backend/runtime_endpoint.py
+++ b/python/sglang/backend/runtime_endpoint.py
@@ -1,14 +1,15 @@
 import json
-from typing import List, Optional
+from typing import Callable, List, Optional, Union
 
 import numpy as np
+import requests
 
 from sglang.backend.base_backend import BaseBackend
 from sglang.global_config import global_config
 from sglang.lang.chat_template import get_chat_template_by_model_path
 from sglang.lang.interpreter import StreamExecutor
-from sglang.lang.ir import SglSamplingParams
-from sglang.utils import find_printable_text, http_request
+from sglang.lang.ir import SglArgument, SglSamplingParams
+from sglang.utils import encode_image_base64, find_printable_text, http_request
 
 
 class RuntimeEndpoint(BaseBackend):
diff --git a/python/sglang/backend/vertexai.py b/python/sglang/backend/vertexai.py
index 30829ebf9..f32fca2f4 100644
--- a/python/sglang/backend/vertexai.py
+++ b/python/sglang/backend/vertexai.py
@@ -1,5 +1,8 @@
 import os
 import warnings
+from typing import List, Optional, Union
+
+import numpy as np
 
 from sglang.backend.base_backend import BaseBackend
 from sglang.lang.chat_template import get_chat_template
diff --git a/python/sglang/lang/chat_template.py b/python/sglang/lang/chat_template.py
index 187e0b885..d91dee365 100644
--- a/python/sglang/lang/chat_template.py
+++ b/python/sglang/lang/chat_template.py
@@ -1,6 +1,6 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from enum import Enum, auto
-from typing import Callable, Dict, List, Tuple
+from typing import Callable, Dict, List, Optional, Tuple
 
 
 class ChatTemplateStyle(Enum):
diff --git a/python/sglang/lang/compiler.py b/python/sglang/lang/compiler.py
index b2a83ea3c..2c071e407 100644
--- a/python/sglang/lang/compiler.py
+++ b/python/sglang/lang/compiler.py
@@ -5,7 +5,13 @@ from typing import List, Union
 
 from sglang.global_config import global_config
 from sglang.lang.interpreter import ProgramState, StreamExecutor, pin_program
-from sglang.lang.ir import SglArgument, SglExpr, SglSamplingParams, SglVariable
+from sglang.lang.ir import (
+    SglArgument,
+    SglConstantText,
+    SglExpr,
+    SglSamplingParams,
+    SglVariable,
+)
 
 
 def compile_func(function, backend):
diff --git a/python/sglang/lang/interpreter.py b/python/sglang/lang/interpreter.py
index d9cf9f839..fc943e91d 100644
--- a/python/sglang/lang/interpreter.py
+++ b/python/sglang/lang/interpreter.py
@@ -7,7 +7,7 @@ import threading
 import uuid
 from concurrent.futures import ThreadPoolExecutor
 from contextlib import contextmanager
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union
 
 import tqdm
 
@@ -18,6 +18,7 @@ from sglang.lang.ir import (
     SglConstantText,
     SglExpr,
     SglExprList,
+    SglFunction,
     SglGen,
     SglImage,
     SglRoleBegin,
diff --git a/python/sglang/lang/ir.py b/python/sglang/lang/ir.py
index 66f515686..9895786dc 100644
--- a/python/sglang/lang/ir.py
+++ b/python/sglang/lang/ir.py
@@ -472,4 +472,4 @@ class SglCommitLazy(SglExpr):
         super().__init__()
 
     def __repr__(self):
-        return "CommitLazy()"
+        return f"CommitLazy()"
diff --git a/python/sglang/lang/tracer.py b/python/sglang/lang/tracer.py
index adfe1af0a..74ac9b998 100644
--- a/python/sglang/lang/tracer.py
+++ b/python/sglang/lang/tracer.py
@@ -1,16 +1,20 @@
 """Tracing a program."""
 
 import uuid
-from typing import Any, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union
 
 from sglang.backend.base_backend import BaseBackend
+from sglang.global_config import global_config
 from sglang.lang.interpreter import ProgramState, ProgramStateGroup
 from sglang.lang.ir import (
     SglArgument,
+    SglCommitLazy,
+    SglConcateAndAppend,
     SglConstantText,
     SglExpr,
     SglExprList,
     SglFork,
+    SglFunction,
     SglGen,
     SglGetForkItem,
     SglRoleBegin,
diff --git a/python/sglang/srt/hf_transformers_utils.py b/python/sglang/srt/hf_transformers_utils.py
index d88e13616..114ae5e1e 100644
--- a/python/sglang/srt/hf_transformers_utils.py
+++ b/python/sglang/srt/hf_transformers_utils.py
@@ -3,7 +3,7 @@
 import json
 import os
 import warnings
-from typing import Optional, Union
+from typing import List, Optional, Tuple, Union
 
 from huggingface_hub import snapshot_download
 from transformers import (
diff --git a/python/sglang/srt/managers/detokenizer_manager.py b/python/sglang/srt/managers/detokenizer_manager.py
index 5076a57f8..32454ead4 100644
--- a/python/sglang/srt/managers/detokenizer_manager.py
+++ b/python/sglang/srt/managers/detokenizer_manager.py
@@ -84,7 +84,7 @@ def start_detokenizer_process(
 ):
     try:
         manager = DetokenizerManager(server_args, port_args)
-    except Exception:
+    except Exception as e:
         pipe_writer.send(get_exception_traceback())
         raise
     pipe_writer.send("init ok")
diff --git a/python/sglang/srt/managers/router/radix_cache.py b/python/sglang/srt/managers/router/radix_cache.py
index ccf7f4af4..c7bd9cb6b 100644
--- a/python/sglang/srt/managers/router/radix_cache.py
+++ b/python/sglang/srt/managers/router/radix_cache.py
@@ -1,6 +1,8 @@
 import heapq
 import time
 from collections import defaultdict
+from dataclasses import dataclass
+from typing import Tuple
 
 import torch
 
diff --git a/python/sglang/srt/models/commandr.py b/python/sglang/srt/models/commandr.py
index 60aa095d1..74bf9dcdf 100644
--- a/python/sglang/srt/models/commandr.py
+++ b/python/sglang/srt/models/commandr.py
@@ -20,7 +20,7 @@
 
 # This file is based on the LLama model definition file in transformers
 """PyTorch Cohere model."""
-from typing import Optional, Tuple
+from typing import List, Optional, Tuple
 
 import torch
 import torch.utils.checkpoint
diff --git a/python/sglang/srt/models/llama2.py b/python/sglang/srt/models/llama2.py
index 212c4cf87..2f366d158 100644
--- a/python/sglang/srt/models/llama2.py
+++ b/python/sglang/srt/models/llama2.py
@@ -1,7 +1,7 @@
 # Adapted from
 # https://github.com/vllm-project/vllm/blob/671af2b1c0b3ed6d856d37c21a561cc429a10701/vllm/model_executor/models/llama.py#L1
 """Inference-only LLaMA model compatible with HuggingFace weights."""
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 import torch
 from torch import nn
diff --git a/python/sglang/srt/models/llava.py b/python/sglang/srt/models/llava.py
index e7db6a543..aca97d3b4 100644
--- a/python/sglang/srt/models/llava.py
+++ b/python/sglang/srt/models/llava.py
@@ -5,7 +5,7 @@ from typing import List, Optional
 import numpy as np
 import torch
 from torch import nn
-from transformers import CLIPVisionModel, LlavaConfig
+from transformers import CLIPVisionModel, LlamaConfig, LlavaConfig
 from transformers.models.llava.modeling_llava import LlavaMultiModalProjector
 from vllm.model_executor.layers.linear import LinearMethodBase
 from vllm.model_executor.weight_utils import (
diff --git a/python/sglang/srt/models/mixtral.py b/python/sglang/srt/models/mixtral.py
index 9d3742535..ed7ef24d0 100644
--- a/python/sglang/srt/models/mixtral.py
+++ b/python/sglang/srt/models/mixtral.py
@@ -1,7 +1,7 @@
 # Adapted from
 # https://github.com/vllm-project/vllm/blob/d0215a58e78572d91dadafe9d832a2db89b09a13/vllm/model_executor/models/mixtral.py#L1
 """Inference-only Mixtral model."""
-from typing import Optional
+from typing import List, Optional, Tuple
 
 import numpy as np
 import torch
diff --git a/python/sglang/srt/models/qwen.py b/python/sglang/srt/models/qwen.py
index 12480016d..e7fee4a92 100644
--- a/python/sglang/srt/models/qwen.py
+++ b/python/sglang/srt/models/qwen.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional, Tuple
 
 import torch
 from torch import nn
diff --git a/python/sglang/srt/models/qwen2.py b/python/sglang/srt/models/qwen2.py
index 2314e5a33..e38941990 100644
--- a/python/sglang/srt/models/qwen2.py
+++ b/python/sglang/srt/models/qwen2.py
@@ -1,7 +1,7 @@
 # Adapted from llama2.py
 # Modify details for the adaptation of Qwen2 model.
 """Inference-only Qwen2 model compatible with HuggingFace weights."""
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 import torch
 from torch import nn
diff --git a/python/sglang/srt/models/yivl.py b/python/sglang/srt/models/yivl.py
index f2d7b1948..0e6c87811 100644
--- a/python/sglang/srt/models/yivl.py
+++ b/python/sglang/srt/models/yivl.py
@@ -1,6 +1,7 @@
 """Inference-only Yi-VL model."""
 
-from typing import Optional
+import os
+from typing import List, Optional
 
 import torch
 import torch.nn as nn
@@ -12,6 +13,7 @@ from vllm.model_executor.weight_utils import (
 
 from sglang.srt.models.llava import (
     LlavaLlamaForCausalLM,
+    clip_vision_embed_forward,
     monkey_path_clip_vision_embed_forward,
 )
 
diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py
index 5643f0ad4..b3395f162 100644
--- a/python/sglang/srt/server.py
+++ b/python/sglang/srt/server.py
@@ -10,6 +10,9 @@ import threading
 import time
 from typing import List, Optional, Union
 
+# Fix a Python bug
+setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
+
 import aiohttp
 import psutil
 import pydantic
@@ -55,9 +58,6 @@ from sglang.srt.managers.tokenizer_manager import TokenizerManager
 from sglang.srt.server_args import PortArgs, ServerArgs
 from sglang.srt.utils import enable_show_time_cost, handle_port_init
 
-# Fix a Python bug
-setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
-
 asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
 
 API_KEY_HEADER_NAME = "X-API-Key"
@@ -619,7 +619,7 @@ def launch_server(server_args, pipe_finish_writer):
             try:
                 requests.get(url + "/get_model_info", timeout=5, headers=headers)
                 break
-            except requests.exceptions.RequestException:
+            except requests.exceptions.RequestException as e:
                 pass
         else:
             if pipe_finish_writer is not None:
diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py
index 479bdda09..0f7322bb6 100644
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -157,6 +157,7 @@ def get_exception_traceback():
 
 
 def get_int_token_logit_bias(tokenizer, vocab_size):
+    from transformers import LlamaTokenizer, LlamaTokenizerFast
 
     # a bug when model's vocab size > tokenizer.vocab_size
     vocab_size = tokenizer.vocab_size
diff --git a/test/lang/run_all.py b/test/lang/run_all.py
index 75d5d5c2b..cb5da1585 100644
--- a/test/lang/run_all.py
+++ b/test/lang/run_all.py
@@ -1,6 +1,7 @@
 import argparse
 import glob
 import multiprocessing
+import os
 import time
 import unittest
 
diff --git a/test/lang/test_anthropic_backend.py b/test/lang/test_anthropic_backend.py
index 83f6c76f4..3eb4051d7 100644
--- a/test/lang/test_anthropic_backend.py
+++ b/test/lang/test_anthropic_backend.py
@@ -1,3 +1,4 @@
+import json
 import unittest
 
 from sglang import Anthropic, set_default_backend
diff --git a/test/lang/test_srt_backend.py b/test/lang/test_srt_backend.py
index 007d96257..c92568c0b 100644
--- a/test/lang/test_srt_backend.py
+++ b/test/lang/test_srt_backend.py
@@ -2,6 +2,7 @@
 python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
 """
 
+import json
 import unittest
 
 import sglang as sgl
@@ -12,6 +13,8 @@ from sglang.test.test_programs import (
     test_few_shot_qa,
     test_mt_bench,
     test_parallel_decoding,
+    test_parallel_encoding,
+    test_react,
     test_regex,
     test_select,
     test_stream,
diff --git a/test/lang/test_tracing.py b/test/lang/test_tracing.py
index f77b50752..266ce65fe 100644
--- a/test/lang/test_tracing.py
+++ b/test/lang/test_tracing.py
@@ -110,7 +110,7 @@ class TestTracing(unittest.TestCase):
             forks = s.fork(3)
             for i in range(3):
                 forks[i] += f"Now, expand tip {i+1} into a paragraph:\n"
-                forks[i] += sgl.gen("detailed_tip")
+                forks[i] += sgl.gen(f"detailed_tip")
 
             s += "Tip 1:" + forks[0]["detailed_tip"] + "\n"
             s += "Tip 2:" + forks[1]["detailed_tip"] + "\n"
diff --git a/test/srt/model/reference_hf.py b/test/srt/model/reference_hf.py
index 4060f9212..e63866f02 100644
--- a/test/srt/model/reference_hf.py
+++ b/test/srt/model/reference_hf.py
@@ -1,4 +1,5 @@
 import argparse
+import os
 
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
diff --git a/test/srt/model/test_llama_extend.py b/test/srt/model/test_llama_extend.py
index cf589be7b..cdb40f887 100644
--- a/test/srt/model/test_llama_extend.py
+++ b/test/srt/model/test_llama_extend.py
@@ -1,6 +1,10 @@
 import multiprocessing
 import os
+import time
 
+import numpy as np
+import torch
+import torch.distributed as dist
 import transformers
 
 from sglang.srt.managers.router.infer_batch import Batch, ForwardMode, Req
diff --git a/test/srt/model/test_llava_low_api.py b/test/srt/model/test_llava_low_api.py
index 38b030d07..186a46df0 100644
--- a/test/srt/model/test_llava_low_api.py
+++ b/test/srt/model/test_llava_low_api.py
@@ -1,10 +1,13 @@
 import multiprocessing
+import time
 
 import numpy as np
 import torch
+import torch.distributed as dist
 
 from sglang.srt.hf_transformers_utils import get_processor
-from sglang.srt.managers.router.model_runner import ModelRunner
+from sglang.srt.managers.router.infer_batch import ForwardMode
+from sglang.srt.managers.router.model_runner import InputMetadata, ModelRunner
 from sglang.srt.model_config import ModelConfig
 from sglang.srt.utils import load_image
 
diff --git a/test/srt/test_httpserver_concurrent.py b/test/srt/test_httpserver_concurrent.py
index 6cdd5332d..855e51f33 100644
--- a/test/srt/test_httpserver_concurrent.py
+++ b/test/srt/test_httpserver_concurrent.py
@@ -9,8 +9,11 @@ The capital of the United Kindom is London.\nThe capital of the United Kingdom i
 
 import argparse
 import asyncio
+import json
+import time
 
 import aiohttp
+import requests
 
 
 async def send_request(url, data, delay=0):
diff --git a/test/srt/test_httpserver_llava.py b/test/srt/test_httpserver_llava.py
index 6db4ab930..0f6571b45 100644
--- a/test/srt/test_httpserver_llava.py
+++ b/test/srt/test_httpserver_llava.py
@@ -10,6 +10,7 @@ The image features a man standing on the back of a yellow taxi cab, holding
 import argparse
 import asyncio
 import json
+import time
 
 import aiohttp
 import requests
diff --git a/test/srt/test_httpserver_reuse.py b/test/srt/test_httpserver_reuse.py
index ef866afc6..c3f786589 100644
--- a/test/srt/test_httpserver_reuse.py
+++ b/test/srt/test_httpserver_reuse.py
@@ -6,6 +6,7 @@ The capital of France is Paris.\nThe capital of the United States is Washington,
 """
 
 import argparse
+import time
 
 import requests