add .isort.cfg (#378)
This commit is contained in:
@@ -4,6 +4,7 @@ from dataclasses import dataclass
|
||||
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
|
||||
from sglang.srt.managers.router.model_runner import ModelRunner
|
||||
from sglang.srt.model_config import ModelConfig
|
||||
|
||||
@@ -66,9 +67,9 @@ class BenchBatch:
|
||||
p_idx = prefix_req_idx[i // fork_num].item()
|
||||
n_idx = self.req_pool_indices[i].item()
|
||||
req_to_token[n_idx, :prefix_len] = req_to_token[p_idx, :prefix_len]
|
||||
req_to_token[
|
||||
n_idx, prefix_len : prefix_len + extend_len
|
||||
] = self.out_cache_loc[i * extend_len : (i + 1) * extend_len]
|
||||
req_to_token[n_idx, prefix_len : prefix_len + extend_len] = (
|
||||
self.out_cache_loc[i * extend_len : (i + 1) * extend_len]
|
||||
)
|
||||
|
||||
def update_decode(self, predict_ids, batch_size):
|
||||
assert predict_ids.shape[0] == batch_size
|
||||
@@ -81,9 +82,9 @@ class BenchBatch:
|
||||
self.out_cache_cont_start,
|
||||
self.out_cache_cont_end,
|
||||
) = self.token_to_kv_pool.alloc_contiguous(batch_size)
|
||||
self.req_to_token_pool.req_to_token[
|
||||
self.req_pool_indices, self.seq_lens
|
||||
] = self.out_cache_loc
|
||||
self.req_to_token_pool.req_to_token[self.req_pool_indices, self.seq_lens] = (
|
||||
self.out_cache_loc
|
||||
)
|
||||
self.seq_lens.add_(1)
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import torch
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
import multiprocessing
|
||||
import os
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
import transformers
|
||||
|
||||
from sglang.srt.managers.router.infer_batch import Batch, ForwardMode, Req
|
||||
from sglang.srt.managers.router.model_runner import ModelRunner
|
||||
from sglang.srt.model_config import ModelConfig
|
||||
|
||||
@@ -4,6 +4,7 @@ import time
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
|
||||
from sglang.srt.managers.router.model_runner import ModelRunner
|
||||
from sglang.srt.model_config import ModelConfig
|
||||
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
import multiprocessing
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
|
||||
from sglang.srt.hf_transformers_utils import get_processor
|
||||
from sglang.srt.managers.router.infer_batch import ForwardMode
|
||||
from sglang.srt.managers.router.model_runner import InputMetadata, ModelRunner
|
||||
from sglang.srt.managers.router.model_runner import ModelRunner
|
||||
from sglang.srt.model_config import ModelConfig
|
||||
from sglang.srt.utils import load_image
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import flashinfer
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from sglang.srt.layers.extend_attention import extend_attention_fwd
|
||||
from sglang.srt.layers.token_attention import token_attention_fwd
|
||||
|
||||
|
||||
@@ -9,11 +9,8 @@ The capital of the United Kindom is London.\nThe capital of the United Kingdom i
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import time
|
||||
|
||||
import aiohttp
|
||||
import requests
|
||||
|
||||
|
||||
async def send_request(url, data, delay=0):
|
||||
|
||||
@@ -10,7 +10,6 @@ The image features a man standing on the back of a yellow taxi cab, holding
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import time
|
||||
|
||||
import aiohttp
|
||||
import requests
|
||||
|
||||
@@ -6,7 +6,6 @@ The capital of France is Paris.\nThe capital of the United States is Washington,
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import time
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
@@ -2,14 +2,14 @@ import argparse
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel, constr
|
||||
|
||||
import sglang as sgl
|
||||
from sglang.srt.constrained import build_regex_from_object
|
||||
from sglang.test.test_utils import (
|
||||
add_common_sglang_args_and_parse,
|
||||
select_sglang_backend,
|
||||
)
|
||||
|
||||
import sglang as sgl
|
||||
|
||||
IP_REGEX = r"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)"
|
||||
|
||||
ip_jump_forward = (
|
||||
|
||||
@@ -2,13 +2,13 @@ import argparse
|
||||
import random
|
||||
import string
|
||||
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
|
||||
import sglang as sgl
|
||||
from sglang.test.test_utils import (
|
||||
add_common_sglang_args_and_parse,
|
||||
select_sglang_backend,
|
||||
)
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
|
||||
import sglang as sgl
|
||||
|
||||
TOKENIZER = None
|
||||
RANDOM_PREFILL_LEN = None
|
||||
|
||||
Reference in New Issue
Block a user