diff --git a/python/sglang/srt/entrypoints/EngineBase.py b/python/sglang/srt/entrypoints/EngineBase.py index 894a6f065..c7dfafd41 100644 --- a/python/sglang/srt/entrypoints/EngineBase.py +++ b/python/sglang/srt/entrypoints/EngineBase.py @@ -27,6 +27,11 @@ class EngineBase(ABC): """Generate outputs based on given inputs.""" pass + @abstractmethod + def flush_cache(self): + """Flush the cache of the engine.""" + pass + @abstractmethod def update_weights_from_tensor( self, diff --git a/python/sglang/srt/entrypoints/http_server_engine.py b/python/sglang/srt/entrypoints/http_server_engine.py index f4d81a417..f50e13f3c 100644 --- a/python/sglang/srt/entrypoints/http_server_engine.py +++ b/python/sglang/srt/entrypoints/http_server_engine.py @@ -140,3 +140,6 @@ class HttpServerEngineAdapter(EngineBase): def resume_memory_occupation(self): return self._make_request("resume_memory_occupation") + + def flush_cache(self): + return self._make_request("flush_cache")