Revert removing the unused imports (#385)

2024-04-23 22:36:33 +08:00
parent 9acc6e3504
commit 150d7020ed
33 changed files with 72 additions and 26 deletions
--- a/python/sglang/api.py
+++ b/python/sglang/api.py
@@ -3,7 +3,11 @@
 import re
 from typing import Callable, List, Optional, Union

+from sglang.backend.anthropic import Anthropic
 from sglang.backend.base_backend import BaseBackend
+from sglang.backend.openai import OpenAI
+from sglang.backend.runtime_endpoint import RuntimeEndpoint
+from sglang.backend.vertexai import VertexAI
 from sglang.global_config import global_config
 from sglang.lang.ir import (
    SglExpr,
--- a/python/sglang/backend/anthropic.py
+++ b/python/sglang/backend/anthropic.py
@@ -1,3 +1,7 @@
+from typing import List, Optional, Union
+
+import numpy as np
+
 from sglang.backend.base_backend import BaseBackend
 from sglang.lang.chat_template import get_chat_template
 from sglang.lang.interpreter import StreamExecutor
--- a/python/sglang/backend/base_backend.py
+++ b/python/sglang/backend/base_backend.py
@@ -1,4 +1,4 @@
-from typing import List, Optional, Union
+from typing import Callable, List, Optional, Union

 from sglang.lang.chat_template import get_chat_template
 from sglang.lang.interpreter import StreamExecutor
--- a/python/sglang/backend/openai.py
+++ b/python/sglang/backend/openai.py
@@ -1,6 +1,6 @@
 import logging
 import time
-from typing import List, Optional
+from typing import Callable, List, Optional, Union

 import numpy as np

--- a/python/sglang/backend/runtime_endpoint.py
+++ b/python/sglang/backend/runtime_endpoint.py
@@ -1,14 +1,15 @@
 import json
-from typing import List, Optional
+from typing import Callable, List, Optional, Union

 import numpy as np
+import requests

 from sglang.backend.base_backend import BaseBackend
 from sglang.global_config import global_config
 from sglang.lang.chat_template import get_chat_template_by_model_path
 from sglang.lang.interpreter import StreamExecutor
-from sglang.lang.ir import SglSamplingParams
-from sglang.utils import find_printable_text, http_request
+from sglang.lang.ir import SglArgument, SglSamplingParams
+from sglang.utils import encode_image_base64, find_printable_text, http_request


 class RuntimeEndpoint(BaseBackend):
--- a/python/sglang/backend/vertexai.py
+++ b/python/sglang/backend/vertexai.py
@@ -1,5 +1,8 @@
 import os
 import warnings
+from typing import List, Optional, Union
+
+import numpy as np

 from sglang.backend.base_backend import BaseBackend
 from sglang.lang.chat_template import get_chat_template
--- a/python/sglang/lang/chat_template.py
+++ b/python/sglang/lang/chat_template.py
@@ -1,6 +1,6 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from enum import Enum, auto
-from typing import Callable, Dict, List, Tuple
+from typing import Callable, Dict, List, Optional, Tuple


 class ChatTemplateStyle(Enum):
--- a/python/sglang/lang/compiler.py
+++ b/python/sglang/lang/compiler.py
@@ -5,7 +5,13 @@ from typing import List, Union

 from sglang.global_config import global_config
 from sglang.lang.interpreter import ProgramState, StreamExecutor, pin_program
-from sglang.lang.ir import SglArgument, SglExpr, SglSamplingParams, SglVariable
+from sglang.lang.ir import (
+    SglArgument,
+    SglConstantText,
+    SglExpr,
+    SglSamplingParams,
+    SglVariable,
+)


 def compile_func(function, backend):
--- a/python/sglang/lang/interpreter.py
+++ b/python/sglang/lang/interpreter.py
@@ -7,7 +7,7 @@ import threading
 import uuid
 from concurrent.futures import ThreadPoolExecutor
 from contextlib import contextmanager
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union

 import tqdm

@@ -18,6 +18,7 @@ from sglang.lang.ir import (
    SglConstantText,
    SglExpr,
    SglExprList,
+    SglFunction,
    SglGen,
    SglImage,
    SglRoleBegin,
--- a/python/sglang/lang/ir.py
+++ b/python/sglang/lang/ir.py
@@ -472,4 +472,4 @@ class SglCommitLazy(SglExpr):
        super().__init__()

    def __repr__(self):
-        return "CommitLazy()"
+        return f"CommitLazy()"
--- a/python/sglang/lang/tracer.py
+++ b/python/sglang/lang/tracer.py
@@ -1,16 +1,20 @@
 """Tracing a program."""

 import uuid
-from typing import Any, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union

 from sglang.backend.base_backend import BaseBackend
+from sglang.global_config import global_config
 from sglang.lang.interpreter import ProgramState, ProgramStateGroup
 from sglang.lang.ir import (
    SglArgument,
+    SglCommitLazy,
+    SglConcateAndAppend,
    SglConstantText,
    SglExpr,
    SglExprList,
    SglFork,
+    SglFunction,
    SglGen,
    SglGetForkItem,
    SglRoleBegin,
--- a/python/sglang/srt/hf_transformers_utils.py
+++ b/python/sglang/srt/hf_transformers_utils.py
@@ -3,7 +3,7 @@
 import json
 import os
 import warnings
-from typing import Optional, Union
+from typing import List, Optional, Tuple, Union

 from huggingface_hub import snapshot_download
 from transformers import (
--- a/python/sglang/srt/managers/detokenizer_manager.py
+++ b/python/sglang/srt/managers/detokenizer_manager.py
@@ -84,7 +84,7 @@ def start_detokenizer_process(
 ):
    try:
        manager = DetokenizerManager(server_args, port_args)
-    except Exception:
+    except Exception as e:
        pipe_writer.send(get_exception_traceback())
        raise
    pipe_writer.send("init ok")
--- a/python/sglang/srt/managers/router/radix_cache.py
+++ b/python/sglang/srt/managers/router/radix_cache.py
@@ -1,6 +1,8 @@
 import heapq
 import time
 from collections import defaultdict
+from dataclasses import dataclass
+from typing import Tuple

 import torch

--- a/python/sglang/srt/models/commandr.py
+++ b/python/sglang/srt/models/commandr.py
@@ -20,7 +20,7 @@

 # This file is based on the LLama model definition file in transformers
 """PyTorch Cohere model."""
-from typing import Optional, Tuple
+from typing import List, Optional, Tuple

 import torch
 import torch.utils.checkpoint
--- a/python/sglang/srt/models/llama2.py
+++ b/python/sglang/srt/models/llama2.py
@@ -1,7 +1,7 @@
 # Adapted from
 # https://github.com/vllm-project/vllm/blob/671af2b1c0b3ed6d856d37c21a561cc429a10701/vllm/model_executor/models/llama.py#L1
 """Inference-only LLaMA model compatible with HuggingFace weights."""
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple

 import torch
 from torch import nn
--- a/python/sglang/srt/models/llava.py
+++ b/python/sglang/srt/models/llava.py
@@ -5,7 +5,7 @@ from typing import List, Optional
 import numpy as np
 import torch
 from torch import nn
-from transformers import CLIPVisionModel, LlavaConfig
+from transformers import CLIPVisionModel, LlamaConfig, LlavaConfig
 from transformers.models.llava.modeling_llava import LlavaMultiModalProjector
 from vllm.model_executor.layers.linear import LinearMethodBase
 from vllm.model_executor.weight_utils import (
--- a/python/sglang/srt/models/mixtral.py
+++ b/python/sglang/srt/models/mixtral.py
@@ -1,7 +1,7 @@
 # Adapted from
 # https://github.com/vllm-project/vllm/blob/d0215a58e78572d91dadafe9d832a2db89b09a13/vllm/model_executor/models/mixtral.py#L1
 """Inference-only Mixtral model."""
-from typing import Optional
+from typing import List, Optional, Tuple

 import numpy as np
 import torch
--- a/python/sglang/srt/models/qwen.py
+++ b/python/sglang/srt/models/qwen.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional, Tuple

 import torch
 from torch import nn
--- a/python/sglang/srt/models/qwen2.py
+++ b/python/sglang/srt/models/qwen2.py
@@ -1,7 +1,7 @@
 # Adapted from llama2.py
 # Modify details for the adaptation of Qwen2 model.
 """Inference-only Qwen2 model compatible with HuggingFace weights."""
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple

 import torch
 from torch import nn
--- a/python/sglang/srt/models/yivl.py
+++ b/python/sglang/srt/models/yivl.py
@@ -1,6 +1,7 @@
 """Inference-only Yi-VL model."""

-from typing import Optional
+import os
+from typing import List, Optional

 import torch
 import torch.nn as nn
@@ -12,6 +13,7 @@ from vllm.model_executor.weight_utils import (

 from sglang.srt.models.llava import (
    LlavaLlamaForCausalLM,
+    clip_vision_embed_forward,
    monkey_path_clip_vision_embed_forward,
 )

--- a/python/sglang/srt/server.py
+++ b/python/sglang/srt/server.py
@@ -10,6 +10,9 @@ import threading
 import time
 from typing import List, Optional, Union

+# Fix a Python bug
+setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
+
 import aiohttp
 import psutil
 import pydantic
@@ -55,9 +58,6 @@ from sglang.srt.managers.tokenizer_manager import TokenizerManager
 from sglang.srt.server_args import PortArgs, ServerArgs
 from sglang.srt.utils import enable_show_time_cost, handle_port_init

-# Fix a Python bug
-setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
-
 asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())

 API_KEY_HEADER_NAME = "X-API-Key"
@@ -619,7 +619,7 @@ def launch_server(server_args, pipe_finish_writer):
            try:
                requests.get(url + "/get_model_info", timeout=5, headers=headers)
                break
-            except requests.exceptions.RequestException:
+            except requests.exceptions.RequestException as e:
                pass
        else:
            if pipe_finish_writer is not None:
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -157,6 +157,7 @@ def get_exception_traceback():


 def get_int_token_logit_bias(tokenizer, vocab_size):
+    from transformers import LlamaTokenizer, LlamaTokenizerFast

    # a bug when model's vocab size > tokenizer.vocab_size
    vocab_size = tokenizer.vocab_size