Revert removing the unused imports (#385)
This commit is contained in:
@@ -3,7 +3,11 @@
|
|||||||
import re
|
import re
|
||||||
from typing import Callable, List, Optional, Union
|
from typing import Callable, List, Optional, Union
|
||||||
|
|
||||||
|
from sglang.backend.anthropic import Anthropic
|
||||||
from sglang.backend.base_backend import BaseBackend
|
from sglang.backend.base_backend import BaseBackend
|
||||||
|
from sglang.backend.openai import OpenAI
|
||||||
|
from sglang.backend.runtime_endpoint import RuntimeEndpoint
|
||||||
|
from sglang.backend.vertexai import VertexAI
|
||||||
from sglang.global_config import global_config
|
from sglang.global_config import global_config
|
||||||
from sglang.lang.ir import (
|
from sglang.lang.ir import (
|
||||||
SglExpr,
|
SglExpr,
|
||||||
|
|||||||
@@ -1,3 +1,7 @@
|
|||||||
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
from sglang.backend.base_backend import BaseBackend
|
from sglang.backend.base_backend import BaseBackend
|
||||||
from sglang.lang.chat_template import get_chat_template
|
from sglang.lang.chat_template import get_chat_template
|
||||||
from sglang.lang.interpreter import StreamExecutor
|
from sglang.lang.interpreter import StreamExecutor
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from typing import List, Optional, Union
|
from typing import Callable, List, Optional, Union
|
||||||
|
|
||||||
from sglang.lang.chat_template import get_chat_template
|
from sglang.lang.chat_template import get_chat_template
|
||||||
from sglang.lang.interpreter import StreamExecutor
|
from sglang.lang.interpreter import StreamExecutor
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
from typing import List, Optional
|
from typing import Callable, List, Optional, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|||||||
@@ -1,14 +1,15 @@
|
|||||||
import json
|
import json
|
||||||
from typing import List, Optional
|
from typing import Callable, List, Optional, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import requests
|
||||||
|
|
||||||
from sglang.backend.base_backend import BaseBackend
|
from sglang.backend.base_backend import BaseBackend
|
||||||
from sglang.global_config import global_config
|
from sglang.global_config import global_config
|
||||||
from sglang.lang.chat_template import get_chat_template_by_model_path
|
from sglang.lang.chat_template import get_chat_template_by_model_path
|
||||||
from sglang.lang.interpreter import StreamExecutor
|
from sglang.lang.interpreter import StreamExecutor
|
||||||
from sglang.lang.ir import SglSamplingParams
|
from sglang.lang.ir import SglArgument, SglSamplingParams
|
||||||
from sglang.utils import find_printable_text, http_request
|
from sglang.utils import encode_image_base64, find_printable_text, http_request
|
||||||
|
|
||||||
|
|
||||||
class RuntimeEndpoint(BaseBackend):
|
class RuntimeEndpoint(BaseBackend):
|
||||||
|
|||||||
@@ -1,5 +1,8 @@
|
|||||||
import os
|
import os
|
||||||
import warnings
|
import warnings
|
||||||
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
from sglang.backend.base_backend import BaseBackend
|
from sglang.backend.base_backend import BaseBackend
|
||||||
from sglang.lang.chat_template import get_chat_template
|
from sglang.lang.chat_template import get_chat_template
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass, field
|
||||||
from enum import Enum, auto
|
from enum import Enum, auto
|
||||||
from typing import Callable, Dict, List, Tuple
|
from typing import Callable, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
|
||||||
class ChatTemplateStyle(Enum):
|
class ChatTemplateStyle(Enum):
|
||||||
|
|||||||
@@ -5,7 +5,13 @@ from typing import List, Union
|
|||||||
|
|
||||||
from sglang.global_config import global_config
|
from sglang.global_config import global_config
|
||||||
from sglang.lang.interpreter import ProgramState, StreamExecutor, pin_program
|
from sglang.lang.interpreter import ProgramState, StreamExecutor, pin_program
|
||||||
from sglang.lang.ir import SglArgument, SglExpr, SglSamplingParams, SglVariable
|
from sglang.lang.ir import (
|
||||||
|
SglArgument,
|
||||||
|
SglConstantText,
|
||||||
|
SglExpr,
|
||||||
|
SglSamplingParams,
|
||||||
|
SglVariable,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def compile_func(function, backend):
|
def compile_func(function, backend):
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import threading
|
|||||||
import uuid
|
import uuid
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from typing import Any, Callable, Dict, List, Optional
|
from typing import Any, Callable, Dict, List, Optional, Union
|
||||||
|
|
||||||
import tqdm
|
import tqdm
|
||||||
|
|
||||||
@@ -18,6 +18,7 @@ from sglang.lang.ir import (
|
|||||||
SglConstantText,
|
SglConstantText,
|
||||||
SglExpr,
|
SglExpr,
|
||||||
SglExprList,
|
SglExprList,
|
||||||
|
SglFunction,
|
||||||
SglGen,
|
SglGen,
|
||||||
SglImage,
|
SglImage,
|
||||||
SglRoleBegin,
|
SglRoleBegin,
|
||||||
|
|||||||
@@ -472,4 +472,4 @@ class SglCommitLazy(SglExpr):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "CommitLazy()"
|
return f"CommitLazy()"
|
||||||
|
|||||||
@@ -1,16 +1,20 @@
|
|||||||
"""Tracing a program."""
|
"""Tracing a program."""
|
||||||
|
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Callable, Dict, List, Optional, Union
|
||||||
|
|
||||||
from sglang.backend.base_backend import BaseBackend
|
from sglang.backend.base_backend import BaseBackend
|
||||||
|
from sglang.global_config import global_config
|
||||||
from sglang.lang.interpreter import ProgramState, ProgramStateGroup
|
from sglang.lang.interpreter import ProgramState, ProgramStateGroup
|
||||||
from sglang.lang.ir import (
|
from sglang.lang.ir import (
|
||||||
SglArgument,
|
SglArgument,
|
||||||
|
SglCommitLazy,
|
||||||
|
SglConcateAndAppend,
|
||||||
SglConstantText,
|
SglConstantText,
|
||||||
SglExpr,
|
SglExpr,
|
||||||
SglExprList,
|
SglExprList,
|
||||||
SglFork,
|
SglFork,
|
||||||
|
SglFunction,
|
||||||
SglGen,
|
SglGen,
|
||||||
SglGetForkItem,
|
SglGetForkItem,
|
||||||
SglRoleBegin,
|
SglRoleBegin,
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import warnings
|
import warnings
|
||||||
from typing import Optional, Union
|
from typing import List, Optional, Tuple, Union
|
||||||
|
|
||||||
from huggingface_hub import snapshot_download
|
from huggingface_hub import snapshot_download
|
||||||
from transformers import (
|
from transformers import (
|
||||||
|
|||||||
@@ -84,7 +84,7 @@ def start_detokenizer_process(
|
|||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
manager = DetokenizerManager(server_args, port_args)
|
manager = DetokenizerManager(server_args, port_args)
|
||||||
except Exception:
|
except Exception as e:
|
||||||
pipe_writer.send(get_exception_traceback())
|
pipe_writer.send(get_exception_traceback())
|
||||||
raise
|
raise
|
||||||
pipe_writer.send("init ok")
|
pipe_writer.send("init ok")
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
import heapq
|
import heapq
|
||||||
import time
|
import time
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Tuple
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|||||||
@@ -20,7 +20,7 @@
|
|||||||
|
|
||||||
# This file is based on the LLama model definition file in transformers
|
# This file is based on the LLama model definition file in transformers
|
||||||
"""PyTorch Cohere model."""
|
"""PyTorch Cohere model."""
|
||||||
from typing import Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.utils.checkpoint
|
import torch.utils.checkpoint
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
# Adapted from
|
# Adapted from
|
||||||
# https://github.com/vllm-project/vllm/blob/671af2b1c0b3ed6d856d37c21a561cc429a10701/vllm/model_executor/models/llama.py#L1
|
# https://github.com/vllm-project/vllm/blob/671af2b1c0b3ed6d856d37c21a561cc429a10701/vllm/model_executor/models/llama.py#L1
|
||||||
"""Inference-only LLaMA model compatible with HuggingFace weights."""
|
"""Inference-only LLaMA model compatible with HuggingFace weights."""
|
||||||
from typing import Any, Dict, Optional, Tuple
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from typing import List, Optional
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from transformers import CLIPVisionModel, LlavaConfig
|
from transformers import CLIPVisionModel, LlamaConfig, LlavaConfig
|
||||||
from transformers.models.llava.modeling_llava import LlavaMultiModalProjector
|
from transformers.models.llava.modeling_llava import LlavaMultiModalProjector
|
||||||
from vllm.model_executor.layers.linear import LinearMethodBase
|
from vllm.model_executor.layers.linear import LinearMethodBase
|
||||||
from vllm.model_executor.weight_utils import (
|
from vllm.model_executor.weight_utils import (
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
# Adapted from
|
# Adapted from
|
||||||
# https://github.com/vllm-project/vllm/blob/d0215a58e78572d91dadafe9d832a2db89b09a13/vllm/model_executor/models/mixtral.py#L1
|
# https://github.com/vllm-project/vllm/blob/d0215a58e78572d91dadafe9d832a2db89b09a13/vllm/model_executor/models/mixtral.py#L1
|
||||||
"""Inference-only Mixtral model."""
|
"""Inference-only Mixtral model."""
|
||||||
from typing import Optional
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
# Adapted from llama2.py
|
# Adapted from llama2.py
|
||||||
# Modify details for the adaptation of Qwen2 model.
|
# Modify details for the adaptation of Qwen2 model.
|
||||||
"""Inference-only Qwen2 model compatible with HuggingFace weights."""
|
"""Inference-only Qwen2 model compatible with HuggingFace weights."""
|
||||||
from typing import Any, Dict, Optional, Tuple
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
"""Inference-only Yi-VL model."""
|
"""Inference-only Yi-VL model."""
|
||||||
|
|
||||||
from typing import Optional
|
import os
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
@@ -12,6 +13,7 @@ from vllm.model_executor.weight_utils import (
|
|||||||
|
|
||||||
from sglang.srt.models.llava import (
|
from sglang.srt.models.llava import (
|
||||||
LlavaLlamaForCausalLM,
|
LlavaLlamaForCausalLM,
|
||||||
|
clip_vision_embed_forward,
|
||||||
monkey_path_clip_vision_embed_forward,
|
monkey_path_clip_vision_embed_forward,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,9 @@ import threading
|
|||||||
import time
|
import time
|
||||||
from typing import List, Optional, Union
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
|
# Fix a Python bug
|
||||||
|
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import psutil
|
import psutil
|
||||||
import pydantic
|
import pydantic
|
||||||
@@ -55,9 +58,6 @@ from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
|||||||
from sglang.srt.server_args import PortArgs, ServerArgs
|
from sglang.srt.server_args import PortArgs, ServerArgs
|
||||||
from sglang.srt.utils import enable_show_time_cost, handle_port_init
|
from sglang.srt.utils import enable_show_time_cost, handle_port_init
|
||||||
|
|
||||||
# Fix a Python bug
|
|
||||||
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
|
|
||||||
|
|
||||||
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
|
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
|
||||||
|
|
||||||
API_KEY_HEADER_NAME = "X-API-Key"
|
API_KEY_HEADER_NAME = "X-API-Key"
|
||||||
@@ -619,7 +619,7 @@ def launch_server(server_args, pipe_finish_writer):
|
|||||||
try:
|
try:
|
||||||
requests.get(url + "/get_model_info", timeout=5, headers=headers)
|
requests.get(url + "/get_model_info", timeout=5, headers=headers)
|
||||||
break
|
break
|
||||||
except requests.exceptions.RequestException:
|
except requests.exceptions.RequestException as e:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
if pipe_finish_writer is not None:
|
if pipe_finish_writer is not None:
|
||||||
|
|||||||
@@ -157,6 +157,7 @@ def get_exception_traceback():
|
|||||||
|
|
||||||
|
|
||||||
def get_int_token_logit_bias(tokenizer, vocab_size):
|
def get_int_token_logit_bias(tokenizer, vocab_size):
|
||||||
|
from transformers import LlamaTokenizer, LlamaTokenizerFast
|
||||||
|
|
||||||
# a bug when model's vocab size > tokenizer.vocab_size
|
# a bug when model's vocab size > tokenizer.vocab_size
|
||||||
vocab_size = tokenizer.vocab_size
|
vocab_size = tokenizer.vocab_size
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import glob
|
import glob
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
|
import os
|
||||||
import time
|
import time
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import json
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from sglang import Anthropic, set_default_backend
|
from sglang import Anthropic, set_default_backend
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
|
python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import sglang as sgl
|
import sglang as sgl
|
||||||
@@ -12,6 +13,8 @@ from sglang.test.test_programs import (
|
|||||||
test_few_shot_qa,
|
test_few_shot_qa,
|
||||||
test_mt_bench,
|
test_mt_bench,
|
||||||
test_parallel_decoding,
|
test_parallel_decoding,
|
||||||
|
test_parallel_encoding,
|
||||||
|
test_react,
|
||||||
test_regex,
|
test_regex,
|
||||||
test_select,
|
test_select,
|
||||||
test_stream,
|
test_stream,
|
||||||
|
|||||||
@@ -110,7 +110,7 @@ class TestTracing(unittest.TestCase):
|
|||||||
forks = s.fork(3)
|
forks = s.fork(3)
|
||||||
for i in range(3):
|
for i in range(3):
|
||||||
forks[i] += f"Now, expand tip {i+1} into a paragraph:\n"
|
forks[i] += f"Now, expand tip {i+1} into a paragraph:\n"
|
||||||
forks[i] += sgl.gen("detailed_tip")
|
forks[i] += sgl.gen(f"detailed_tip")
|
||||||
|
|
||||||
s += "Tip 1:" + forks[0]["detailed_tip"] + "\n"
|
s += "Tip 1:" + forks[0]["detailed_tip"] + "\n"
|
||||||
s += "Tip 2:" + forks[1]["detailed_tip"] + "\n"
|
s += "Tip 2:" + forks[1]["detailed_tip"] + "\n"
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import argparse
|
import argparse
|
||||||
|
import os
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
|
|||||||
@@ -1,6 +1,10 @@
|
|||||||
import multiprocessing
|
import multiprocessing
|
||||||
import os
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import torch.distributed as dist
|
||||||
import transformers
|
import transformers
|
||||||
|
|
||||||
from sglang.srt.managers.router.infer_batch import Batch, ForwardMode, Req
|
from sglang.srt.managers.router.infer_batch import Batch, ForwardMode, Req
|
||||||
|
|||||||
@@ -1,10 +1,13 @@
|
|||||||
import multiprocessing
|
import multiprocessing
|
||||||
|
import time
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
import torch.distributed as dist
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_processor
|
from sglang.srt.hf_transformers_utils import get_processor
|
||||||
from sglang.srt.managers.router.model_runner import ModelRunner
|
from sglang.srt.managers.router.infer_batch import ForwardMode
|
||||||
|
from sglang.srt.managers.router.model_runner import InputMetadata, ModelRunner
|
||||||
from sglang.srt.model_config import ModelConfig
|
from sglang.srt.model_config import ModelConfig
|
||||||
from sglang.srt.utils import load_image
|
from sglang.srt.utils import load_image
|
||||||
|
|
||||||
|
|||||||
@@ -9,8 +9,11 @@ The capital of the United Kindom is London.\nThe capital of the United Kingdom i
|
|||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
async def send_request(url, data, delay=0):
|
async def send_request(url, data, delay=0):
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ The image features a man standing on the back of a yellow taxi cab, holding
|
|||||||
import argparse
|
import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
|
import time
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import requests
|
import requests
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ The capital of France is Paris.\nThe capital of the United States is Washington,
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import time
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user