From eef9433b46006af606e9edbcdf6ce5c9255be303 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sun, 20 Apr 2025 22:56:40 -0700 Subject: [PATCH] Fix flush cache (#5590) --- python/sglang/srt/entrypoints/http_server.py | 6 +----- python/sglang/srt/managers/tokenizer_manager.py | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/python/sglang/srt/entrypoints/http_server.py b/python/sglang/srt/entrypoints/http_server.py index 750409ee9..7f075f176 100644 --- a/python/sglang/srt/entrypoints/http_server.py +++ b/python/sglang/srt/entrypoints/http_server.py @@ -25,11 +25,8 @@ import multiprocessing as multiprocessing import os import threading import time -from ast import Mult from http import HTTPStatus -from typing import AsyncIterator, Callable, Dict, Optional, Union - -from sglang.srt.model_executor.model_runner import LocalSerializedTensor +from typing import AsyncIterator, Callable, Dict, Optional # Fix a bug of Python threading setattr(threading, "_register_atexit", lambda *args, **kwargs: None) @@ -84,7 +81,6 @@ from sglang.srt.openai_api.protocol import ModelCard, ModelList from sglang.srt.reasoning_parser import ReasoningParser from sglang.srt.server_args import ServerArgs from sglang.srt.utils import ( - MultiprocessingSerializer, add_api_key_middleware, add_prometheus_middleware, delete_directory, diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index e144781dd..99372819f 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -716,7 +716,7 @@ class TokenizerManager: pass async def flush_cache(self) -> FlushCacheReqOutput: - return await self.flush_cache_communicator(FlushCacheReqInput()) + return (await self.flush_cache_communicator(FlushCacheReqInput()))[0] def abort_request(self, rid: str): if rid not in self.rid_to_state: