From 150d7020ed8fcba4f3fdef52b770850aff8ae048 Mon Sep 17 00:00:00 2001 From: Liangsheng Yin Date: Tue, 23 Apr 2024 22:36:33 +0800 Subject: [PATCH] Revert removing the unused imports (#385) --- python/sglang/api.py | 4 ++++ python/sglang/backend/anthropic.py | 4 ++++ python/sglang/backend/base_backend.py | 2 +- python/sglang/backend/openai.py | 2 +- python/sglang/backend/runtime_endpoint.py | 7 ++++--- python/sglang/backend/vertexai.py | 3 +++ python/sglang/lang/chat_template.py | 4 ++-- python/sglang/lang/compiler.py | 8 +++++++- python/sglang/lang/interpreter.py | 3 ++- python/sglang/lang/ir.py | 2 +- python/sglang/lang/tracer.py | 6 +++++- python/sglang/srt/hf_transformers_utils.py | 2 +- python/sglang/srt/managers/detokenizer_manager.py | 2 +- python/sglang/srt/managers/router/radix_cache.py | 2 ++ python/sglang/srt/models/commandr.py | 2 +- python/sglang/srt/models/llama2.py | 2 +- python/sglang/srt/models/llava.py | 2 +- python/sglang/srt/models/mixtral.py | 2 +- python/sglang/srt/models/qwen.py | 2 +- python/sglang/srt/models/qwen2.py | 2 +- python/sglang/srt/models/yivl.py | 4 +++- python/sglang/srt/server.py | 8 ++++---- python/sglang/srt/utils.py | 1 + test/lang/run_all.py | 1 + test/lang/test_anthropic_backend.py | 1 + test/lang/test_srt_backend.py | 3 +++ test/lang/test_tracing.py | 2 +- test/srt/model/reference_hf.py | 1 + test/srt/model/test_llama_extend.py | 4 ++++ test/srt/model/test_llava_low_api.py | 5 ++++- test/srt/test_httpserver_concurrent.py | 3 +++ test/srt/test_httpserver_llava.py | 1 + test/srt/test_httpserver_reuse.py | 1 + 33 files changed, 72 insertions(+), 26 deletions(-) diff --git a/python/sglang/api.py b/python/sglang/api.py index 21a9a13fa..9470b1425 100644 --- a/python/sglang/api.py +++ b/python/sglang/api.py @@ -3,7 +3,11 @@ import re from typing import Callable, List, Optional, Union +from sglang.backend.anthropic import Anthropic from sglang.backend.base_backend import BaseBackend +from sglang.backend.openai import OpenAI +from sglang.backend.runtime_endpoint import RuntimeEndpoint +from sglang.backend.vertexai import VertexAI from sglang.global_config import global_config from sglang.lang.ir import ( SglExpr, diff --git a/python/sglang/backend/anthropic.py b/python/sglang/backend/anthropic.py index 82b3ab7b0..851bc176a 100644 --- a/python/sglang/backend/anthropic.py +++ b/python/sglang/backend/anthropic.py @@ -1,3 +1,7 @@ +from typing import List, Optional, Union + +import numpy as np + from sglang.backend.base_backend import BaseBackend from sglang.lang.chat_template import get_chat_template from sglang.lang.interpreter import StreamExecutor diff --git a/python/sglang/backend/base_backend.py b/python/sglang/backend/base_backend.py index 606b821a8..cb504f51b 100644 --- a/python/sglang/backend/base_backend.py +++ b/python/sglang/backend/base_backend.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Union +from typing import Callable, List, Optional, Union from sglang.lang.chat_template import get_chat_template from sglang.lang.interpreter import StreamExecutor diff --git a/python/sglang/backend/openai.py b/python/sglang/backend/openai.py index 06f80c341..3c0210975 100644 --- a/python/sglang/backend/openai.py +++ b/python/sglang/backend/openai.py @@ -1,6 +1,6 @@ import logging import time -from typing import List, Optional +from typing import Callable, List, Optional, Union import numpy as np diff --git a/python/sglang/backend/runtime_endpoint.py b/python/sglang/backend/runtime_endpoint.py index 13e905e3e..77b9a3277 100644 --- a/python/sglang/backend/runtime_endpoint.py +++ b/python/sglang/backend/runtime_endpoint.py @@ -1,14 +1,15 @@ import json -from typing import List, Optional +from typing import Callable, List, Optional, Union import numpy as np +import requests from sglang.backend.base_backend import BaseBackend from sglang.global_config import global_config from sglang.lang.chat_template import get_chat_template_by_model_path from sglang.lang.interpreter import StreamExecutor -from sglang.lang.ir import SglSamplingParams -from sglang.utils import find_printable_text, http_request +from sglang.lang.ir import SglArgument, SglSamplingParams +from sglang.utils import encode_image_base64, find_printable_text, http_request class RuntimeEndpoint(BaseBackend): diff --git a/python/sglang/backend/vertexai.py b/python/sglang/backend/vertexai.py index 30829ebf9..f32fca2f4 100644 --- a/python/sglang/backend/vertexai.py +++ b/python/sglang/backend/vertexai.py @@ -1,5 +1,8 @@ import os import warnings +from typing import List, Optional, Union + +import numpy as np from sglang.backend.base_backend import BaseBackend from sglang.lang.chat_template import get_chat_template diff --git a/python/sglang/lang/chat_template.py b/python/sglang/lang/chat_template.py index 187e0b885..d91dee365 100644 --- a/python/sglang/lang/chat_template.py +++ b/python/sglang/lang/chat_template.py @@ -1,6 +1,6 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from enum import Enum, auto -from typing import Callable, Dict, List, Tuple +from typing import Callable, Dict, List, Optional, Tuple class ChatTemplateStyle(Enum): diff --git a/python/sglang/lang/compiler.py b/python/sglang/lang/compiler.py index b2a83ea3c..2c071e407 100644 --- a/python/sglang/lang/compiler.py +++ b/python/sglang/lang/compiler.py @@ -5,7 +5,13 @@ from typing import List, Union from sglang.global_config import global_config from sglang.lang.interpreter import ProgramState, StreamExecutor, pin_program -from sglang.lang.ir import SglArgument, SglExpr, SglSamplingParams, SglVariable +from sglang.lang.ir import ( + SglArgument, + SglConstantText, + SglExpr, + SglSamplingParams, + SglVariable, +) def compile_func(function, backend): diff --git a/python/sglang/lang/interpreter.py b/python/sglang/lang/interpreter.py index d9cf9f839..fc943e91d 100644 --- a/python/sglang/lang/interpreter.py +++ b/python/sglang/lang/interpreter.py @@ -7,7 +7,7 @@ import threading import uuid from concurrent.futures import ThreadPoolExecutor from contextlib import contextmanager -from typing import Any, Callable, Dict, List, Optional +from typing import Any, Callable, Dict, List, Optional, Union import tqdm @@ -18,6 +18,7 @@ from sglang.lang.ir import ( SglConstantText, SglExpr, SglExprList, + SglFunction, SglGen, SglImage, SglRoleBegin, diff --git a/python/sglang/lang/ir.py b/python/sglang/lang/ir.py index 66f515686..9895786dc 100644 --- a/python/sglang/lang/ir.py +++ b/python/sglang/lang/ir.py @@ -472,4 +472,4 @@ class SglCommitLazy(SglExpr): super().__init__() def __repr__(self): - return "CommitLazy()" + return f"CommitLazy()" diff --git a/python/sglang/lang/tracer.py b/python/sglang/lang/tracer.py index adfe1af0a..74ac9b998 100644 --- a/python/sglang/lang/tracer.py +++ b/python/sglang/lang/tracer.py @@ -1,16 +1,20 @@ """Tracing a program.""" import uuid -from typing import Any, Dict, List, Optional +from typing import Any, Callable, Dict, List, Optional, Union from sglang.backend.base_backend import BaseBackend +from sglang.global_config import global_config from sglang.lang.interpreter import ProgramState, ProgramStateGroup from sglang.lang.ir import ( SglArgument, + SglCommitLazy, + SglConcateAndAppend, SglConstantText, SglExpr, SglExprList, SglFork, + SglFunction, SglGen, SglGetForkItem, SglRoleBegin, diff --git a/python/sglang/srt/hf_transformers_utils.py b/python/sglang/srt/hf_transformers_utils.py index d88e13616..114ae5e1e 100644 --- a/python/sglang/srt/hf_transformers_utils.py +++ b/python/sglang/srt/hf_transformers_utils.py @@ -3,7 +3,7 @@ import json import os import warnings -from typing import Optional, Union +from typing import List, Optional, Tuple, Union from huggingface_hub import snapshot_download from transformers import ( diff --git a/python/sglang/srt/managers/detokenizer_manager.py b/python/sglang/srt/managers/detokenizer_manager.py index 5076a57f8..32454ead4 100644 --- a/python/sglang/srt/managers/detokenizer_manager.py +++ b/python/sglang/srt/managers/detokenizer_manager.py @@ -84,7 +84,7 @@ def start_detokenizer_process( ): try: manager = DetokenizerManager(server_args, port_args) - except Exception: + except Exception as e: pipe_writer.send(get_exception_traceback()) raise pipe_writer.send("init ok") diff --git a/python/sglang/srt/managers/router/radix_cache.py b/python/sglang/srt/managers/router/radix_cache.py index ccf7f4af4..c7bd9cb6b 100644 --- a/python/sglang/srt/managers/router/radix_cache.py +++ b/python/sglang/srt/managers/router/radix_cache.py @@ -1,6 +1,8 @@ import heapq import time from collections import defaultdict +from dataclasses import dataclass +from typing import Tuple import torch diff --git a/python/sglang/srt/models/commandr.py b/python/sglang/srt/models/commandr.py index 60aa095d1..74bf9dcdf 100644 --- a/python/sglang/srt/models/commandr.py +++ b/python/sglang/srt/models/commandr.py @@ -20,7 +20,7 @@ # This file is based on the LLama model definition file in transformers """PyTorch Cohere model.""" -from typing import Optional, Tuple +from typing import List, Optional, Tuple import torch import torch.utils.checkpoint diff --git a/python/sglang/srt/models/llama2.py b/python/sglang/srt/models/llama2.py index 212c4cf87..2f366d158 100644 --- a/python/sglang/srt/models/llama2.py +++ b/python/sglang/srt/models/llama2.py @@ -1,7 +1,7 @@ # Adapted from # https://github.com/vllm-project/vllm/blob/671af2b1c0b3ed6d856d37c21a561cc429a10701/vllm/model_executor/models/llama.py#L1 """Inference-only LLaMA model compatible with HuggingFace weights.""" -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple import torch from torch import nn diff --git a/python/sglang/srt/models/llava.py b/python/sglang/srt/models/llava.py index e7db6a543..aca97d3b4 100644 --- a/python/sglang/srt/models/llava.py +++ b/python/sglang/srt/models/llava.py @@ -5,7 +5,7 @@ from typing import List, Optional import numpy as np import torch from torch import nn -from transformers import CLIPVisionModel, LlavaConfig +from transformers import CLIPVisionModel, LlamaConfig, LlavaConfig from transformers.models.llava.modeling_llava import LlavaMultiModalProjector from vllm.model_executor.layers.linear import LinearMethodBase from vllm.model_executor.weight_utils import ( diff --git a/python/sglang/srt/models/mixtral.py b/python/sglang/srt/models/mixtral.py index 9d3742535..ed7ef24d0 100644 --- a/python/sglang/srt/models/mixtral.py +++ b/python/sglang/srt/models/mixtral.py @@ -1,7 +1,7 @@ # Adapted from # https://github.com/vllm-project/vllm/blob/d0215a58e78572d91dadafe9d832a2db89b09a13/vllm/model_executor/models/mixtral.py#L1 """Inference-only Mixtral model.""" -from typing import Optional +from typing import List, Optional, Tuple import numpy as np import torch diff --git a/python/sglang/srt/models/qwen.py b/python/sglang/srt/models/qwen.py index 12480016d..e7fee4a92 100644 --- a/python/sglang/srt/models/qwen.py +++ b/python/sglang/srt/models/qwen.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional, Tuple import torch from torch import nn diff --git a/python/sglang/srt/models/qwen2.py b/python/sglang/srt/models/qwen2.py index 2314e5a33..e38941990 100644 --- a/python/sglang/srt/models/qwen2.py +++ b/python/sglang/srt/models/qwen2.py @@ -1,7 +1,7 @@ # Adapted from llama2.py # Modify details for the adaptation of Qwen2 model. """Inference-only Qwen2 model compatible with HuggingFace weights.""" -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple import torch from torch import nn diff --git a/python/sglang/srt/models/yivl.py b/python/sglang/srt/models/yivl.py index f2d7b1948..0e6c87811 100644 --- a/python/sglang/srt/models/yivl.py +++ b/python/sglang/srt/models/yivl.py @@ -1,6 +1,7 @@ """Inference-only Yi-VL model.""" -from typing import Optional +import os +from typing import List, Optional import torch import torch.nn as nn @@ -12,6 +13,7 @@ from vllm.model_executor.weight_utils import ( from sglang.srt.models.llava import ( LlavaLlamaForCausalLM, + clip_vision_embed_forward, monkey_path_clip_vision_embed_forward, ) diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index 5643f0ad4..b3395f162 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -10,6 +10,9 @@ import threading import time from typing import List, Optional, Union +# Fix a Python bug +setattr(threading, "_register_atexit", lambda *args, **kwargs: None) + import aiohttp import psutil import pydantic @@ -55,9 +58,6 @@ from sglang.srt.managers.tokenizer_manager import TokenizerManager from sglang.srt.server_args import PortArgs, ServerArgs from sglang.srt.utils import enable_show_time_cost, handle_port_init -# Fix a Python bug -setattr(threading, "_register_atexit", lambda *args, **kwargs: None) - asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) API_KEY_HEADER_NAME = "X-API-Key" @@ -619,7 +619,7 @@ def launch_server(server_args, pipe_finish_writer): try: requests.get(url + "/get_model_info", timeout=5, headers=headers) break - except requests.exceptions.RequestException: + except requests.exceptions.RequestException as e: pass else: if pipe_finish_writer is not None: diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py index 479bdda09..0f7322bb6 100644 --- a/python/sglang/srt/utils.py +++ b/python/sglang/srt/utils.py @@ -157,6 +157,7 @@ def get_exception_traceback(): def get_int_token_logit_bias(tokenizer, vocab_size): + from transformers import LlamaTokenizer, LlamaTokenizerFast # a bug when model's vocab size > tokenizer.vocab_size vocab_size = tokenizer.vocab_size diff --git a/test/lang/run_all.py b/test/lang/run_all.py index 75d5d5c2b..cb5da1585 100644 --- a/test/lang/run_all.py +++ b/test/lang/run_all.py @@ -1,6 +1,7 @@ import argparse import glob import multiprocessing +import os import time import unittest diff --git a/test/lang/test_anthropic_backend.py b/test/lang/test_anthropic_backend.py index 83f6c76f4..3eb4051d7 100644 --- a/test/lang/test_anthropic_backend.py +++ b/test/lang/test_anthropic_backend.py @@ -1,3 +1,4 @@ +import json import unittest from sglang import Anthropic, set_default_backend diff --git a/test/lang/test_srt_backend.py b/test/lang/test_srt_backend.py index 007d96257..c92568c0b 100644 --- a/test/lang/test_srt_backend.py +++ b/test/lang/test_srt_backend.py @@ -2,6 +2,7 @@ python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 """ +import json import unittest import sglang as sgl @@ -12,6 +13,8 @@ from sglang.test.test_programs import ( test_few_shot_qa, test_mt_bench, test_parallel_decoding, + test_parallel_encoding, + test_react, test_regex, test_select, test_stream, diff --git a/test/lang/test_tracing.py b/test/lang/test_tracing.py index f77b50752..266ce65fe 100644 --- a/test/lang/test_tracing.py +++ b/test/lang/test_tracing.py @@ -110,7 +110,7 @@ class TestTracing(unittest.TestCase): forks = s.fork(3) for i in range(3): forks[i] += f"Now, expand tip {i+1} into a paragraph:\n" - forks[i] += sgl.gen("detailed_tip") + forks[i] += sgl.gen(f"detailed_tip") s += "Tip 1:" + forks[0]["detailed_tip"] + "\n" s += "Tip 2:" + forks[1]["detailed_tip"] + "\n" diff --git a/test/srt/model/reference_hf.py b/test/srt/model/reference_hf.py index 4060f9212..e63866f02 100644 --- a/test/srt/model/reference_hf.py +++ b/test/srt/model/reference_hf.py @@ -1,4 +1,5 @@ import argparse +import os import torch from transformers import AutoModelForCausalLM, AutoTokenizer diff --git a/test/srt/model/test_llama_extend.py b/test/srt/model/test_llama_extend.py index cf589be7b..cdb40f887 100644 --- a/test/srt/model/test_llama_extend.py +++ b/test/srt/model/test_llama_extend.py @@ -1,6 +1,10 @@ import multiprocessing import os +import time +import numpy as np +import torch +import torch.distributed as dist import transformers from sglang.srt.managers.router.infer_batch import Batch, ForwardMode, Req diff --git a/test/srt/model/test_llava_low_api.py b/test/srt/model/test_llava_low_api.py index 38b030d07..186a46df0 100644 --- a/test/srt/model/test_llava_low_api.py +++ b/test/srt/model/test_llava_low_api.py @@ -1,10 +1,13 @@ import multiprocessing +import time import numpy as np import torch +import torch.distributed as dist from sglang.srt.hf_transformers_utils import get_processor -from sglang.srt.managers.router.model_runner import ModelRunner +from sglang.srt.managers.router.infer_batch import ForwardMode +from sglang.srt.managers.router.model_runner import InputMetadata, ModelRunner from sglang.srt.model_config import ModelConfig from sglang.srt.utils import load_image diff --git a/test/srt/test_httpserver_concurrent.py b/test/srt/test_httpserver_concurrent.py index 6cdd5332d..855e51f33 100644 --- a/test/srt/test_httpserver_concurrent.py +++ b/test/srt/test_httpserver_concurrent.py @@ -9,8 +9,11 @@ The capital of the United Kindom is London.\nThe capital of the United Kingdom i import argparse import asyncio +import json +import time import aiohttp +import requests async def send_request(url, data, delay=0): diff --git a/test/srt/test_httpserver_llava.py b/test/srt/test_httpserver_llava.py index 6db4ab930..0f6571b45 100644 --- a/test/srt/test_httpserver_llava.py +++ b/test/srt/test_httpserver_llava.py @@ -10,6 +10,7 @@ The image features a man standing on the back of a yellow taxi cab, holding import argparse import asyncio import json +import time import aiohttp import requests diff --git a/test/srt/test_httpserver_reuse.py b/test/srt/test_httpserver_reuse.py index ef866afc6..c3f786589 100644 --- a/test/srt/test_httpserver_reuse.py +++ b/test/srt/test_httpserver_reuse.py @@ -6,6 +6,7 @@ The capital of France is Paris.\nThe capital of the United States is Washington, """ import argparse +import time import requests