Revert removing the unused imports (#385)
This commit is contained in:
@@ -3,7 +3,11 @@
|
||||
import re
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
from sglang.backend.anthropic import Anthropic
|
||||
from sglang.backend.base_backend import BaseBackend
|
||||
from sglang.backend.openai import OpenAI
|
||||
from sglang.backend.runtime_endpoint import RuntimeEndpoint
|
||||
from sglang.backend.vertexai import VertexAI
|
||||
from sglang.global_config import global_config
|
||||
from sglang.lang.ir import (
|
||||
SglExpr,
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sglang.backend.base_backend import BaseBackend
|
||||
from sglang.lang.chat_template import get_chat_template
|
||||
from sglang.lang.interpreter import StreamExecutor
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from typing import List, Optional, Union
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
from sglang.lang.chat_template import get_chat_template
|
||||
from sglang.lang.interpreter import StreamExecutor
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import logging
|
||||
import time
|
||||
from typing import List, Optional
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
import json
|
||||
from typing import List, Optional
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import requests
|
||||
|
||||
from sglang.backend.base_backend import BaseBackend
|
||||
from sglang.global_config import global_config
|
||||
from sglang.lang.chat_template import get_chat_template_by_model_path
|
||||
from sglang.lang.interpreter import StreamExecutor
|
||||
from sglang.lang.ir import SglSamplingParams
|
||||
from sglang.utils import find_printable_text, http_request
|
||||
from sglang.lang.ir import SglArgument, SglSamplingParams
|
||||
from sglang.utils import encode_image_base64, find_printable_text, http_request
|
||||
|
||||
|
||||
class RuntimeEndpoint(BaseBackend):
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
import os
|
||||
import warnings
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sglang.backend.base_backend import BaseBackend
|
||||
from sglang.lang.chat_template import get_chat_template
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum, auto
|
||||
from typing import Callable, Dict, List, Tuple
|
||||
from typing import Callable, Dict, List, Optional, Tuple
|
||||
|
||||
|
||||
class ChatTemplateStyle(Enum):
|
||||
|
||||
@@ -5,7 +5,13 @@ from typing import List, Union
|
||||
|
||||
from sglang.global_config import global_config
|
||||
from sglang.lang.interpreter import ProgramState, StreamExecutor, pin_program
|
||||
from sglang.lang.ir import SglArgument, SglExpr, SglSamplingParams, SglVariable
|
||||
from sglang.lang.ir import (
|
||||
SglArgument,
|
||||
SglConstantText,
|
||||
SglExpr,
|
||||
SglSamplingParams,
|
||||
SglVariable,
|
||||
)
|
||||
|
||||
|
||||
def compile_func(function, backend):
|
||||
|
||||
@@ -7,7 +7,7 @@ import threading
|
||||
import uuid
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from contextlib import contextmanager
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
import tqdm
|
||||
|
||||
@@ -18,6 +18,7 @@ from sglang.lang.ir import (
|
||||
SglConstantText,
|
||||
SglExpr,
|
||||
SglExprList,
|
||||
SglFunction,
|
||||
SglGen,
|
||||
SglImage,
|
||||
SglRoleBegin,
|
||||
|
||||
@@ -472,4 +472,4 @@ class SglCommitLazy(SglExpr):
|
||||
super().__init__()
|
||||
|
||||
def __repr__(self):
|
||||
return "CommitLazy()"
|
||||
return f"CommitLazy()"
|
||||
|
||||
@@ -1,16 +1,20 @@
|
||||
"""Tracing a program."""
|
||||
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
from sglang.backend.base_backend import BaseBackend
|
||||
from sglang.global_config import global_config
|
||||
from sglang.lang.interpreter import ProgramState, ProgramStateGroup
|
||||
from sglang.lang.ir import (
|
||||
SglArgument,
|
||||
SglCommitLazy,
|
||||
SglConcateAndAppend,
|
||||
SglConstantText,
|
||||
SglExpr,
|
||||
SglExprList,
|
||||
SglFork,
|
||||
SglFunction,
|
||||
SglGen,
|
||||
SglGetForkItem,
|
||||
SglRoleBegin,
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
import json
|
||||
import os
|
||||
import warnings
|
||||
from typing import Optional, Union
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
from huggingface_hub import snapshot_download
|
||||
from transformers import (
|
||||
|
||||
@@ -84,7 +84,7 @@ def start_detokenizer_process(
|
||||
):
|
||||
try:
|
||||
manager = DetokenizerManager(server_args, port_args)
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
pipe_writer.send(get_exception_traceback())
|
||||
raise
|
||||
pipe_writer.send("init ok")
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import heapq
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from typing import Tuple
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
|
||||
# This file is based on the LLama model definition file in transformers
|
||||
"""PyTorch Cohere model."""
|
||||
from typing import Optional, Tuple
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
import torch.utils.checkpoint
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# Adapted from
|
||||
# https://github.com/vllm-project/vllm/blob/671af2b1c0b3ed6d856d37c21a561cc429a10701/vllm/model_executor/models/llama.py#L1
|
||||
"""Inference-only LLaMA model compatible with HuggingFace weights."""
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
@@ -5,7 +5,7 @@ from typing import List, Optional
|
||||
import numpy as np
|
||||
import torch
|
||||
from torch import nn
|
||||
from transformers import CLIPVisionModel, LlavaConfig
|
||||
from transformers import CLIPVisionModel, LlamaConfig, LlavaConfig
|
||||
from transformers.models.llava.modeling_llava import LlavaMultiModalProjector
|
||||
from vllm.model_executor.layers.linear import LinearMethodBase
|
||||
from vllm.model_executor.weight_utils import (
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# Adapted from
|
||||
# https://github.com/vllm-project/vllm/blob/d0215a58e78572d91dadafe9d832a2db89b09a13/vllm/model_executor/models/mixtral.py#L1
|
||||
"""Inference-only Mixtral model."""
|
||||
from typing import Optional
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# Adapted from llama2.py
|
||||
# Modify details for the adaptation of Qwen2 model.
|
||||
"""Inference-only Qwen2 model compatible with HuggingFace weights."""
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
"""Inference-only Yi-VL model."""
|
||||
|
||||
from typing import Optional
|
||||
import os
|
||||
from typing import List, Optional
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
@@ -12,6 +13,7 @@ from vllm.model_executor.weight_utils import (
|
||||
|
||||
from sglang.srt.models.llava import (
|
||||
LlavaLlamaForCausalLM,
|
||||
clip_vision_embed_forward,
|
||||
monkey_path_clip_vision_embed_forward,
|
||||
)
|
||||
|
||||
|
||||
@@ -10,6 +10,9 @@ import threading
|
||||
import time
|
||||
from typing import List, Optional, Union
|
||||
|
||||
# Fix a Python bug
|
||||
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
|
||||
|
||||
import aiohttp
|
||||
import psutil
|
||||
import pydantic
|
||||
@@ -55,9 +58,6 @@ from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
||||
from sglang.srt.server_args import PortArgs, ServerArgs
|
||||
from sglang.srt.utils import enable_show_time_cost, handle_port_init
|
||||
|
||||
# Fix a Python bug
|
||||
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
|
||||
|
||||
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
|
||||
|
||||
API_KEY_HEADER_NAME = "X-API-Key"
|
||||
@@ -619,7 +619,7 @@ def launch_server(server_args, pipe_finish_writer):
|
||||
try:
|
||||
requests.get(url + "/get_model_info", timeout=5, headers=headers)
|
||||
break
|
||||
except requests.exceptions.RequestException:
|
||||
except requests.exceptions.RequestException as e:
|
||||
pass
|
||||
else:
|
||||
if pipe_finish_writer is not None:
|
||||
|
||||
@@ -157,6 +157,7 @@ def get_exception_traceback():
|
||||
|
||||
|
||||
def get_int_token_logit_bias(tokenizer, vocab_size):
|
||||
from transformers import LlamaTokenizer, LlamaTokenizerFast
|
||||
|
||||
# a bug when model's vocab size > tokenizer.vocab_size
|
||||
vocab_size = tokenizer.vocab_size
|
||||
|
||||
Reference in New Issue
Block a user