Online serving benchmarks of real datasets for hierarchical KV caching (#3211)

Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
This commit is contained in:
Yueyang Pan
2025-03-06 01:16:43 +01:00
committed by GitHub
parent 62b362b1f1
commit 25482edb5c
6 changed files with 1914 additions and 1 deletions

View File

@@ -24,10 +24,14 @@ import requests
from IPython.display import HTML, display
from tqdm import tqdm
from sglang.srt.openai_api.protocol import ChatCompletionMessageContentPart
from sglang.srt.utils import kill_process_tree
logger = logging.getLogger(__name__)
# type of content fields, can be only prompts or with images/videos
MsgContent = Union[str, List[ChatCompletionMessageContentPart]]
def get_exception_traceback():
etype, value, tb = sys.exc_info()