Revert "Dump requests to a folder" (#2869)

This commit is contained in:
Lianmin Zheng
2025-01-13 06:21:25 -08:00
committed by GitHub
parent 6ec75e626d
commit c1e097ca66
2 changed files with 2 additions and 36 deletions

View File

@@ -18,12 +18,10 @@ import copy
import dataclasses import dataclasses
import logging import logging
import os import os
import pickle
import signal import signal
import sys import sys
import time import time
import uuid import uuid
from datetime import datetime
from typing import Any, Awaitable, Dict, Generic, List, Optional, Tuple, TypeVar, Union from typing import Any, Awaitable, Dict, Generic, List, Optional, Tuple, TypeVar, Union
import fastapi import fastapi
@@ -107,7 +105,6 @@ class TokenizerManager:
# Parse args # Parse args
self.server_args = server_args self.server_args = server_args
self.enable_metrics = server_args.enable_metrics self.enable_metrics = server_args.enable_metrics
self.dump_requsts_folder = server_args.dump_requests_folder
# Init inter-process communication # Init inter-process communication
context = zmq.asyncio.Context(2) context = zmq.asyncio.Context(2)
@@ -166,7 +163,6 @@ class TokenizerManager:
# Store states # Store states
self.to_create_loop = True self.to_create_loop = True
self.rid_to_state: Dict[str, ReqState] = {} self.rid_to_state: Dict[str, ReqState] = {}
self.dump_request_list: List[Tuple] = []
# The event to notify the weight sync is finished. # The event to notify the weight sync is finished.
self.model_update_lock = RWLock() self.model_update_lock = RWLock()
@@ -684,9 +680,6 @@ class TokenizerManager:
if self.enable_metrics: if self.enable_metrics:
self.collect_metrics(state, recv_obj, i) self.collect_metrics(state, recv_obj, i)
if self.dump_requsts_folder and state.finished:
self.dump_requests(state, out_dict)
elif isinstance(recv_obj, OpenSessionReqOutput): elif isinstance(recv_obj, OpenSessionReqOutput):
self.session_futures[recv_obj.session_id].set_result( self.session_futures[recv_obj.session_id].set_result(
recv_obj.session_id if recv_obj.success else None recv_obj.session_id if recv_obj.success else None
@@ -825,27 +818,6 @@ class TokenizerManager:
(time.time() - state.created_time) / completion_tokens (time.time() - state.created_time) / completion_tokens
) )
def dump_requests(self, state: ReqState, out_dict: dict):
self.dump_request_list.append(
(state.obj, out_dict, state.created_time, time.time())
)
if len(self.dump_request_list) > int(
os.environ.get("SGLANG_DUMP_REQUESTS_THRESHOLD", "1000")
):
to_dump = self.dump_request_list
self.dump_request_list = []
def background_task():
os.makedirs(self.dump_requsts_folder, exist_ok=True)
current_time = datetime.now()
filename = current_time.strftime("%Y-%m-%d_%H-%M-%S") + ".pkl"
with open(os.path.join(self.dump_requsts_folder, filename), "wb") as f:
pickle.dump(to_dump, f)
# Schedule the task to run in the background without awaiting it
asyncio.create_task(asyncio.to_thread(background_task))
class SignalHandler: class SignalHandler:
def __init__(self, tokenizer_manager): def __init__(self, tokenizer_manager):

View File

@@ -23,6 +23,7 @@ from typing import List, Optional
import torch import torch
from sglang.srt.hf_transformers_utils import check_gguf_file from sglang.srt.hf_transformers_utils import check_gguf_file
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
from sglang.srt.utils import ( from sglang.srt.utils import (
get_amdgpu_memory_capacity, get_amdgpu_memory_capacity,
get_hpu_memory_capacity, get_hpu_memory_capacity,
@@ -88,7 +89,6 @@ class ServerArgs:
show_time_cost: bool = False show_time_cost: bool = False
enable_metrics: bool = False enable_metrics: bool = False
decode_log_interval: int = 40 decode_log_interval: int = 40
dump_requests_folder: str = ""
# API related # API related
api_key: Optional[str] = None api_key: Optional[str] = None
@@ -554,13 +554,7 @@ class ServerArgs:
"--decode-log-interval", "--decode-log-interval",
type=int, type=int,
default=ServerArgs.decode_log_interval, default=ServerArgs.decode_log_interval,
help="The log interval of decode batch.", help="The log interval of decode batch",
)
parser.add_argument(
"--dump-requests-folder",
type=str,
default=ServerArgs.decode_log_interval,
help="Dump raw requests to a folder for replay.",
) )
# API related # API related