Time cost utils (#355)

This commit is contained in:
Liangsheng Yin
2024-04-09 23:27:31 +08:00
committed by GitHub
parent 550a4f78f3
commit 62b3812b69
6 changed files with 66 additions and 47 deletions

View File

@@ -9,9 +9,8 @@ from sglang.lang.interpreter import StreamExecutor
from sglang.lang.ir import SglSamplingParams
try:
import tiktoken
import openai
import tiktoken
except ImportError as e:
openai = tiktoken = e

View File

@@ -7,6 +7,7 @@ class FSMCache(BaseCache):
super().__init__(enable=enable)
from importlib.metadata import version
if version("outlines") >= "0.0.35":
from transformers import AutoTokenizer

View File

@@ -53,7 +53,7 @@ from sglang.srt.managers.openai_protocol import (
from sglang.srt.managers.router.manager import start_router_process
from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.utils import handle_port_init
from sglang.srt.utils import enable_show_time_cost, handle_port_init
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.responses import JSONResponse
@@ -503,6 +503,10 @@ def launch_server(server_args, pipe_finish_writer):
global tokenizer_manager
global chat_template_name
# start show time thread
if server_args.show_time_cost:
enable_show_time_cost()
# disable disk cache if needed
if server_args.disable_disk_cache:
disable_cache()

View File

@@ -26,13 +26,14 @@ class ServerArgs:
disable_log_stats: bool = False
log_stats_interval: int = 10
log_level: str = "info"
api_key: str = ""
show_time_cost: bool = False
# optional modes
disable_radix_cache: bool = False
enable_flashinfer: bool = False
disable_regex_jump_forward: bool = False
disable_disk_cache: bool = False
api_key: str = ""
def __post_init__(self):
if self.tokenizer_path is None:
@@ -181,6 +182,18 @@ class ServerArgs:
default=ServerArgs.log_stats_interval,
help="Log stats interval in second.",
)
parser.add_argument(
"--api-key",
type=str,
default=ServerArgs.api_key,
help="Set API Key",
)
parser.add_argument(
"--show-time-cost",
action="store_true",
help="Show time cost of custom marks",
)
# optional modes
parser.add_argument(
"--disable-radix-cache",
@@ -202,12 +215,6 @@ class ServerArgs:
action="store_true",
help="Disable disk cache to avoid possible crashes related to file system or high concurrency.",
)
parser.add_argument(
"--api-key",
type=str,
default=ServerArgs.api_key,
help="Set API Key",
)
@classmethod
def from_cli_args(cls, args: argparse.Namespace):

View File

@@ -11,48 +11,56 @@ from typing import List, Optional
import numpy as np
import requests
import torch
import torch.distributed as dist
is_show_cost_time = False
show_time_cost = False
time_infos = {}
def mark_cost_time(func_name):
def inner_func(func):
def time_func(*args, **kwargs):
if dist.get_rank() in [0, 1] and is_show_cost_time:
torch.cuda.synchronize()
start_time = time.time()
ans = func(*args, **kwargs)
torch.cuda.synchronize()
print(func_name, "cost time:", (time.time() - start_time) * 1000)
return ans
else:
torch.cuda.synchronize()
ans = func(*args, **kwargs)
torch.cuda.synchronize()
return ans
return time_func
return inner_func
def enable_show_time_cost():
global show_time_cost
show_time_cost = True
time_mark = {}
class TimeInfo:
def __init__(self, name, interval=0.1, color=0, indent=0):
self.name = name
self.interval = interval
self.color = color
self.indent = indent
self.acc_time = 0
self.last_acc_time = 0
def check(self):
if self.acc_time - self.last_acc_time > self.interval:
self.last_acc_time = self.acc_time
return True
return False
def pretty_print(self):
print(f"\x1b[{self.color}m", end="")
print("-" * self.indent * 2, end="")
print(f"{self.name}: {self.acc_time:.3f}s\x1b[0m")
def mark_start(key):
def mark_start(name, interval=0.1, color=0, indent=0):
global time_infos, show_time_cost
if not show_time_cost:
return
torch.cuda.synchronize()
global time_mark
time_mark[key] = time.time()
return
if time_infos.get(name, None) is None:
time_infos[name] = TimeInfo(name, interval, color, indent)
time_infos[name].acc_time -= time.time()
def mark_end(key, print_min_cost=0.0):
def mark_end(name):
global time_infos, show_time_cost
if not show_time_cost:
return
torch.cuda.synchronize()
global time_mark
cost_time = (time.time() - time_mark[key]) * 1000
if cost_time > print_min_cost:
print(f"cost {key}:", cost_time)
time_infos[name].acc_time += time.time()
if time_infos[name].check():
time_infos[name].pretty_print()
def calculate_time(show=False, min_cost_ms=0.0):