Revert removing the unused imports (#385)

This commit is contained in:
Liangsheng Yin
2024-04-23 22:36:33 +08:00
committed by GitHub
parent 9acc6e3504
commit 150d7020ed
33 changed files with 72 additions and 26 deletions

View File

@@ -3,7 +3,11 @@
import re import re
from typing import Callable, List, Optional, Union from typing import Callable, List, Optional, Union
from sglang.backend.anthropic import Anthropic
from sglang.backend.base_backend import BaseBackend from sglang.backend.base_backend import BaseBackend
from sglang.backend.openai import OpenAI
from sglang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.backend.vertexai import VertexAI
from sglang.global_config import global_config from sglang.global_config import global_config
from sglang.lang.ir import ( from sglang.lang.ir import (
SglExpr, SglExpr,

View File

@@ -1,3 +1,7 @@
from typing import List, Optional, Union
import numpy as np
from sglang.backend.base_backend import BaseBackend from sglang.backend.base_backend import BaseBackend
from sglang.lang.chat_template import get_chat_template from sglang.lang.chat_template import get_chat_template
from sglang.lang.interpreter import StreamExecutor from sglang.lang.interpreter import StreamExecutor

View File

@@ -1,4 +1,4 @@
from typing import List, Optional, Union from typing import Callable, List, Optional, Union
from sglang.lang.chat_template import get_chat_template from sglang.lang.chat_template import get_chat_template
from sglang.lang.interpreter import StreamExecutor from sglang.lang.interpreter import StreamExecutor

View File

@@ -1,6 +1,6 @@
import logging import logging
import time import time
from typing import List, Optional from typing import Callable, List, Optional, Union
import numpy as np import numpy as np

View File

@@ -1,14 +1,15 @@
import json import json
from typing import List, Optional from typing import Callable, List, Optional, Union
import numpy as np import numpy as np
import requests
from sglang.backend.base_backend import BaseBackend from sglang.backend.base_backend import BaseBackend
from sglang.global_config import global_config from sglang.global_config import global_config
from sglang.lang.chat_template import get_chat_template_by_model_path from sglang.lang.chat_template import get_chat_template_by_model_path
from sglang.lang.interpreter import StreamExecutor from sglang.lang.interpreter import StreamExecutor
from sglang.lang.ir import SglSamplingParams from sglang.lang.ir import SglArgument, SglSamplingParams
from sglang.utils import find_printable_text, http_request from sglang.utils import encode_image_base64, find_printable_text, http_request
class RuntimeEndpoint(BaseBackend): class RuntimeEndpoint(BaseBackend):

View File

@@ -1,5 +1,8 @@
import os import os
import warnings import warnings
from typing import List, Optional, Union
import numpy as np
from sglang.backend.base_backend import BaseBackend from sglang.backend.base_backend import BaseBackend
from sglang.lang.chat_template import get_chat_template from sglang.lang.chat_template import get_chat_template

View File

@@ -1,6 +1,6 @@
from dataclasses import dataclass from dataclasses import dataclass, field
from enum import Enum, auto from enum import Enum, auto
from typing import Callable, Dict, List, Tuple from typing import Callable, Dict, List, Optional, Tuple
class ChatTemplateStyle(Enum): class ChatTemplateStyle(Enum):

View File

@@ -5,7 +5,13 @@ from typing import List, Union
from sglang.global_config import global_config from sglang.global_config import global_config
from sglang.lang.interpreter import ProgramState, StreamExecutor, pin_program from sglang.lang.interpreter import ProgramState, StreamExecutor, pin_program
from sglang.lang.ir import SglArgument, SglExpr, SglSamplingParams, SglVariable from sglang.lang.ir import (
SglArgument,
SglConstantText,
SglExpr,
SglSamplingParams,
SglVariable,
)
def compile_func(function, backend): def compile_func(function, backend):

View File

@@ -7,7 +7,7 @@ import threading
import uuid import uuid
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from contextlib import contextmanager from contextlib import contextmanager
from typing import Any, Callable, Dict, List, Optional from typing import Any, Callable, Dict, List, Optional, Union
import tqdm import tqdm
@@ -18,6 +18,7 @@ from sglang.lang.ir import (
SglConstantText, SglConstantText,
SglExpr, SglExpr,
SglExprList, SglExprList,
SglFunction,
SglGen, SglGen,
SglImage, SglImage,
SglRoleBegin, SglRoleBegin,

View File

@@ -472,4 +472,4 @@ class SglCommitLazy(SglExpr):
super().__init__() super().__init__()
def __repr__(self): def __repr__(self):
return "CommitLazy()" return f"CommitLazy()"

View File

@@ -1,16 +1,20 @@
"""Tracing a program.""" """Tracing a program."""
import uuid import uuid
from typing import Any, Dict, List, Optional from typing import Any, Callable, Dict, List, Optional, Union
from sglang.backend.base_backend import BaseBackend from sglang.backend.base_backend import BaseBackend
from sglang.global_config import global_config
from sglang.lang.interpreter import ProgramState, ProgramStateGroup from sglang.lang.interpreter import ProgramState, ProgramStateGroup
from sglang.lang.ir import ( from sglang.lang.ir import (
SglArgument, SglArgument,
SglCommitLazy,
SglConcateAndAppend,
SglConstantText, SglConstantText,
SglExpr, SglExpr,
SglExprList, SglExprList,
SglFork, SglFork,
SglFunction,
SglGen, SglGen,
SglGetForkItem, SglGetForkItem,
SglRoleBegin, SglRoleBegin,

View File

@@ -3,7 +3,7 @@
import json import json
import os import os
import warnings import warnings
from typing import Optional, Union from typing import List, Optional, Tuple, Union
from huggingface_hub import snapshot_download from huggingface_hub import snapshot_download
from transformers import ( from transformers import (

View File

@@ -84,7 +84,7 @@ def start_detokenizer_process(
): ):
try: try:
manager = DetokenizerManager(server_args, port_args) manager = DetokenizerManager(server_args, port_args)
except Exception: except Exception as e:
pipe_writer.send(get_exception_traceback()) pipe_writer.send(get_exception_traceback())
raise raise
pipe_writer.send("init ok") pipe_writer.send("init ok")

View File

@@ -1,6 +1,8 @@
import heapq import heapq
import time import time
from collections import defaultdict from collections import defaultdict
from dataclasses import dataclass
from typing import Tuple
import torch import torch

View File

@@ -20,7 +20,7 @@
# This file is based on the LLama model definition file in transformers # This file is based on the LLama model definition file in transformers
"""PyTorch Cohere model.""" """PyTorch Cohere model."""
from typing import Optional, Tuple from typing import List, Optional, Tuple
import torch import torch
import torch.utils.checkpoint import torch.utils.checkpoint

View File

@@ -1,7 +1,7 @@
# Adapted from # Adapted from
# https://github.com/vllm-project/vllm/blob/671af2b1c0b3ed6d856d37c21a561cc429a10701/vllm/model_executor/models/llama.py#L1 # https://github.com/vllm-project/vllm/blob/671af2b1c0b3ed6d856d37c21a561cc429a10701/vllm/model_executor/models/llama.py#L1
"""Inference-only LLaMA model compatible with HuggingFace weights.""" """Inference-only LLaMA model compatible with HuggingFace weights."""
from typing import Any, Dict, Optional, Tuple from typing import Any, Dict, List, Optional, Tuple
import torch import torch
from torch import nn from torch import nn

View File

@@ -5,7 +5,7 @@ from typing import List, Optional
import numpy as np import numpy as np
import torch import torch
from torch import nn from torch import nn
from transformers import CLIPVisionModel, LlavaConfig from transformers import CLIPVisionModel, LlamaConfig, LlavaConfig
from transformers.models.llava.modeling_llava import LlavaMultiModalProjector from transformers.models.llava.modeling_llava import LlavaMultiModalProjector
from vllm.model_executor.layers.linear import LinearMethodBase from vllm.model_executor.layers.linear import LinearMethodBase
from vllm.model_executor.weight_utils import ( from vllm.model_executor.weight_utils import (

View File

@@ -1,7 +1,7 @@
# Adapted from # Adapted from
# https://github.com/vllm-project/vllm/blob/d0215a58e78572d91dadafe9d832a2db89b09a13/vllm/model_executor/models/mixtral.py#L1 # https://github.com/vllm-project/vllm/blob/d0215a58e78572d91dadafe9d832a2db89b09a13/vllm/model_executor/models/mixtral.py#L1
"""Inference-only Mixtral model.""" """Inference-only Mixtral model."""
from typing import Optional from typing import List, Optional, Tuple
import numpy as np import numpy as np
import torch import torch

View File

@@ -1,4 +1,4 @@
from typing import Any, Dict, Optional from typing import Any, Dict, List, Optional, Tuple
import torch import torch
from torch import nn from torch import nn

View File

@@ -1,7 +1,7 @@
# Adapted from llama2.py # Adapted from llama2.py
# Modify details for the adaptation of Qwen2 model. # Modify details for the adaptation of Qwen2 model.
"""Inference-only Qwen2 model compatible with HuggingFace weights.""" """Inference-only Qwen2 model compatible with HuggingFace weights."""
from typing import Any, Dict, Optional, Tuple from typing import Any, Dict, List, Optional, Tuple
import torch import torch
from torch import nn from torch import nn

View File

@@ -1,6 +1,7 @@
"""Inference-only Yi-VL model.""" """Inference-only Yi-VL model."""
from typing import Optional import os
from typing import List, Optional
import torch import torch
import torch.nn as nn import torch.nn as nn
@@ -12,6 +13,7 @@ from vllm.model_executor.weight_utils import (
from sglang.srt.models.llava import ( from sglang.srt.models.llava import (
LlavaLlamaForCausalLM, LlavaLlamaForCausalLM,
clip_vision_embed_forward,
monkey_path_clip_vision_embed_forward, monkey_path_clip_vision_embed_forward,
) )

View File

@@ -10,6 +10,9 @@ import threading
import time import time
from typing import List, Optional, Union from typing import List, Optional, Union
# Fix a Python bug
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
import aiohttp import aiohttp
import psutil import psutil
import pydantic import pydantic
@@ -55,9 +58,6 @@ from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.server_args import PortArgs, ServerArgs from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.utils import enable_show_time_cost, handle_port_init from sglang.srt.utils import enable_show_time_cost, handle_port_init
# Fix a Python bug
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
API_KEY_HEADER_NAME = "X-API-Key" API_KEY_HEADER_NAME = "X-API-Key"
@@ -619,7 +619,7 @@ def launch_server(server_args, pipe_finish_writer):
try: try:
requests.get(url + "/get_model_info", timeout=5, headers=headers) requests.get(url + "/get_model_info", timeout=5, headers=headers)
break break
except requests.exceptions.RequestException: except requests.exceptions.RequestException as e:
pass pass
else: else:
if pipe_finish_writer is not None: if pipe_finish_writer is not None:

View File

@@ -157,6 +157,7 @@ def get_exception_traceback():
def get_int_token_logit_bias(tokenizer, vocab_size): def get_int_token_logit_bias(tokenizer, vocab_size):
from transformers import LlamaTokenizer, LlamaTokenizerFast
# a bug when model's vocab size > tokenizer.vocab_size # a bug when model's vocab size > tokenizer.vocab_size
vocab_size = tokenizer.vocab_size vocab_size = tokenizer.vocab_size

View File

@@ -1,6 +1,7 @@
import argparse import argparse
import glob import glob
import multiprocessing import multiprocessing
import os
import time import time
import unittest import unittest

View File

@@ -1,3 +1,4 @@
import json
import unittest import unittest
from sglang import Anthropic, set_default_backend from sglang import Anthropic, set_default_backend

View File

@@ -2,6 +2,7 @@
python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
""" """
import json
import unittest import unittest
import sglang as sgl import sglang as sgl
@@ -12,6 +13,8 @@ from sglang.test.test_programs import (
test_few_shot_qa, test_few_shot_qa,
test_mt_bench, test_mt_bench,
test_parallel_decoding, test_parallel_decoding,
test_parallel_encoding,
test_react,
test_regex, test_regex,
test_select, test_select,
test_stream, test_stream,

View File

@@ -110,7 +110,7 @@ class TestTracing(unittest.TestCase):
forks = s.fork(3) forks = s.fork(3)
for i in range(3): for i in range(3):
forks[i] += f"Now, expand tip {i+1} into a paragraph:\n" forks[i] += f"Now, expand tip {i+1} into a paragraph:\n"
forks[i] += sgl.gen("detailed_tip") forks[i] += sgl.gen(f"detailed_tip")
s += "Tip 1:" + forks[0]["detailed_tip"] + "\n" s += "Tip 1:" + forks[0]["detailed_tip"] + "\n"
s += "Tip 2:" + forks[1]["detailed_tip"] + "\n" s += "Tip 2:" + forks[1]["detailed_tip"] + "\n"

View File

@@ -1,4 +1,5 @@
import argparse import argparse
import os
import torch import torch
from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import AutoModelForCausalLM, AutoTokenizer

View File

@@ -1,6 +1,10 @@
import multiprocessing import multiprocessing
import os import os
import time
import numpy as np
import torch
import torch.distributed as dist
import transformers import transformers
from sglang.srt.managers.router.infer_batch import Batch, ForwardMode, Req from sglang.srt.managers.router.infer_batch import Batch, ForwardMode, Req

View File

@@ -1,10 +1,13 @@
import multiprocessing import multiprocessing
import time
import numpy as np import numpy as np
import torch import torch
import torch.distributed as dist
from sglang.srt.hf_transformers_utils import get_processor from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.managers.router.model_runner import ModelRunner from sglang.srt.managers.router.infer_batch import ForwardMode
from sglang.srt.managers.router.model_runner import InputMetadata, ModelRunner
from sglang.srt.model_config import ModelConfig from sglang.srt.model_config import ModelConfig
from sglang.srt.utils import load_image from sglang.srt.utils import load_image

View File

@@ -9,8 +9,11 @@ The capital of the United Kindom is London.\nThe capital of the United Kingdom i
import argparse import argparse
import asyncio import asyncio
import json
import time
import aiohttp import aiohttp
import requests
async def send_request(url, data, delay=0): async def send_request(url, data, delay=0):

View File

@@ -10,6 +10,7 @@ The image features a man standing on the back of a yellow taxi cab, holding
import argparse import argparse
import asyncio import asyncio
import json import json
import time
import aiohttp import aiohttp
import requests import requests

View File

@@ -6,6 +6,7 @@ The capital of France is Paris.\nThe capital of the United States is Washington,
""" """
import argparse import argparse
import time
import requests import requests