Revert "Dump requests to a folder" (#2869)
This commit is contained in:
@@ -18,12 +18,10 @@ import copy
|
|||||||
import dataclasses
|
import dataclasses
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import pickle
|
|
||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime
|
|
||||||
from typing import Any, Awaitable, Dict, Generic, List, Optional, Tuple, TypeVar, Union
|
from typing import Any, Awaitable, Dict, Generic, List, Optional, Tuple, TypeVar, Union
|
||||||
|
|
||||||
import fastapi
|
import fastapi
|
||||||
@@ -107,7 +105,6 @@ class TokenizerManager:
|
|||||||
# Parse args
|
# Parse args
|
||||||
self.server_args = server_args
|
self.server_args = server_args
|
||||||
self.enable_metrics = server_args.enable_metrics
|
self.enable_metrics = server_args.enable_metrics
|
||||||
self.dump_requsts_folder = server_args.dump_requests_folder
|
|
||||||
|
|
||||||
# Init inter-process communication
|
# Init inter-process communication
|
||||||
context = zmq.asyncio.Context(2)
|
context = zmq.asyncio.Context(2)
|
||||||
@@ -166,7 +163,6 @@ class TokenizerManager:
|
|||||||
# Store states
|
# Store states
|
||||||
self.to_create_loop = True
|
self.to_create_loop = True
|
||||||
self.rid_to_state: Dict[str, ReqState] = {}
|
self.rid_to_state: Dict[str, ReqState] = {}
|
||||||
self.dump_request_list: List[Tuple] = []
|
|
||||||
|
|
||||||
# The event to notify the weight sync is finished.
|
# The event to notify the weight sync is finished.
|
||||||
self.model_update_lock = RWLock()
|
self.model_update_lock = RWLock()
|
||||||
@@ -684,9 +680,6 @@ class TokenizerManager:
|
|||||||
|
|
||||||
if self.enable_metrics:
|
if self.enable_metrics:
|
||||||
self.collect_metrics(state, recv_obj, i)
|
self.collect_metrics(state, recv_obj, i)
|
||||||
if self.dump_requsts_folder and state.finished:
|
|
||||||
self.dump_requests(state, out_dict)
|
|
||||||
|
|
||||||
elif isinstance(recv_obj, OpenSessionReqOutput):
|
elif isinstance(recv_obj, OpenSessionReqOutput):
|
||||||
self.session_futures[recv_obj.session_id].set_result(
|
self.session_futures[recv_obj.session_id].set_result(
|
||||||
recv_obj.session_id if recv_obj.success else None
|
recv_obj.session_id if recv_obj.success else None
|
||||||
@@ -825,27 +818,6 @@ class TokenizerManager:
|
|||||||
(time.time() - state.created_time) / completion_tokens
|
(time.time() - state.created_time) / completion_tokens
|
||||||
)
|
)
|
||||||
|
|
||||||
def dump_requests(self, state: ReqState, out_dict: dict):
|
|
||||||
self.dump_request_list.append(
|
|
||||||
(state.obj, out_dict, state.created_time, time.time())
|
|
||||||
)
|
|
||||||
|
|
||||||
if len(self.dump_request_list) > int(
|
|
||||||
os.environ.get("SGLANG_DUMP_REQUESTS_THRESHOLD", "1000")
|
|
||||||
):
|
|
||||||
to_dump = self.dump_request_list
|
|
||||||
self.dump_request_list = []
|
|
||||||
|
|
||||||
def background_task():
|
|
||||||
os.makedirs(self.dump_requsts_folder, exist_ok=True)
|
|
||||||
current_time = datetime.now()
|
|
||||||
filename = current_time.strftime("%Y-%m-%d_%H-%M-%S") + ".pkl"
|
|
||||||
with open(os.path.join(self.dump_requsts_folder, filename), "wb") as f:
|
|
||||||
pickle.dump(to_dump, f)
|
|
||||||
|
|
||||||
# Schedule the task to run in the background without awaiting it
|
|
||||||
asyncio.create_task(asyncio.to_thread(background_task))
|
|
||||||
|
|
||||||
|
|
||||||
class SignalHandler:
|
class SignalHandler:
|
||||||
def __init__(self, tokenizer_manager):
|
def __init__(self, tokenizer_manager):
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ from typing import List, Optional
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import check_gguf_file
|
from sglang.srt.hf_transformers_utils import check_gguf_file
|
||||||
|
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
|
||||||
from sglang.srt.utils import (
|
from sglang.srt.utils import (
|
||||||
get_amdgpu_memory_capacity,
|
get_amdgpu_memory_capacity,
|
||||||
get_hpu_memory_capacity,
|
get_hpu_memory_capacity,
|
||||||
@@ -88,7 +89,6 @@ class ServerArgs:
|
|||||||
show_time_cost: bool = False
|
show_time_cost: bool = False
|
||||||
enable_metrics: bool = False
|
enable_metrics: bool = False
|
||||||
decode_log_interval: int = 40
|
decode_log_interval: int = 40
|
||||||
dump_requests_folder: str = ""
|
|
||||||
|
|
||||||
# API related
|
# API related
|
||||||
api_key: Optional[str] = None
|
api_key: Optional[str] = None
|
||||||
@@ -554,13 +554,7 @@ class ServerArgs:
|
|||||||
"--decode-log-interval",
|
"--decode-log-interval",
|
||||||
type=int,
|
type=int,
|
||||||
default=ServerArgs.decode_log_interval,
|
default=ServerArgs.decode_log_interval,
|
||||||
help="The log interval of decode batch.",
|
help="The log interval of decode batch",
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--dump-requests-folder",
|
|
||||||
type=str,
|
|
||||||
default=ServerArgs.decode_log_interval,
|
|
||||||
help="Dump raw requests to a folder for replay.",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# API related
|
# API related
|
||||||
|
|||||||
Reference in New Issue
Block a user