Tiny add Engine.flush_cache API (#5241)
This commit is contained in:
@@ -671,10 +671,15 @@ class BatchEmbeddingOut:
|
||||
|
||||
|
||||
@dataclass
|
||||
class FlushCacheReq:
|
||||
class FlushCacheReqInput:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class FlushCacheReqOutput:
|
||||
success: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class UpdateWeightFromDiskReqInput:
|
||||
# The model path with the new weights
|
||||
|
||||
@@ -60,7 +60,8 @@ from sglang.srt.managers.io_struct import (
|
||||
CloseSessionReqInput,
|
||||
ExpertDistributionReq,
|
||||
ExpertDistributionReqOutput,
|
||||
FlushCacheReq,
|
||||
FlushCacheReqInput,
|
||||
FlushCacheReqOutput,
|
||||
GetInternalStateReq,
|
||||
GetInternalStateReqOutput,
|
||||
GetWeightsByNameReqInput,
|
||||
@@ -402,7 +403,7 @@ class Scheduler(
|
||||
[
|
||||
(TokenizedGenerateReqInput, self.handle_generate_request),
|
||||
(TokenizedEmbeddingReqInput, self.handle_embedding_request),
|
||||
(FlushCacheReq, self.flush_cache_wrapped),
|
||||
(FlushCacheReqInput, self.flush_cache_wrapped),
|
||||
(AbortReq, self.abort_request),
|
||||
(OpenSessionReqInput, self.open_session),
|
||||
(CloseSessionReqInput, self.close_session),
|
||||
@@ -1596,8 +1597,9 @@ class Scheduler(
|
||||
time.sleep(5)
|
||||
self.parent_process.send_signal(signal.SIGQUIT)
|
||||
|
||||
def flush_cache_wrapped(self, recv_req: FlushCacheReq):
|
||||
self.flush_cache()
|
||||
def flush_cache_wrapped(self, recv_req: FlushCacheReqInput):
|
||||
success = self.flush_cache()
|
||||
return FlushCacheReqOutput(success=success)
|
||||
|
||||
def flush_cache(self):
|
||||
"""Flush the memory pool and cache."""
|
||||
|
||||
@@ -66,7 +66,8 @@ from sglang.srt.managers.io_struct import (
|
||||
EmbeddingReqInput,
|
||||
ExpertDistributionReq,
|
||||
ExpertDistributionReqOutput,
|
||||
FlushCacheReq,
|
||||
FlushCacheReqInput,
|
||||
FlushCacheReqOutput,
|
||||
GenerateReqInput,
|
||||
GetInternalStateReq,
|
||||
GetInternalStateReqOutput,
|
||||
@@ -264,6 +265,9 @@ class TokenizerManager:
|
||||
self.resume_memory_occupation_communicator = _Communicator(
|
||||
self.send_to_scheduler, server_args.dp_size
|
||||
)
|
||||
self.flush_cache_communicator = _Communicator(
|
||||
self.send_to_scheduler, server_args.dp_size
|
||||
)
|
||||
self.start_profile_communicator = _Communicator(
|
||||
self.send_to_scheduler, server_args.dp_size
|
||||
)
|
||||
@@ -314,6 +318,10 @@ class TokenizerManager:
|
||||
ResumeMemoryOccupationReqOutput,
|
||||
self.resume_memory_occupation_communicator.handle_recv,
|
||||
),
|
||||
(
|
||||
FlushCacheReqOutput,
|
||||
self.flush_cache_communicator.handle_recv,
|
||||
),
|
||||
(
|
||||
ProfileReqOutput,
|
||||
self.start_profile_communicator.handle_recv,
|
||||
@@ -707,9 +715,8 @@ class TokenizerManager:
|
||||
except StopAsyncIteration:
|
||||
pass
|
||||
|
||||
def flush_cache(self):
|
||||
req = FlushCacheReq()
|
||||
self.send_to_scheduler.send_pyobj(req)
|
||||
async def flush_cache(self) -> FlushCacheReqOutput:
|
||||
return await self.flush_cache_communicator(FlushCacheReqInput())
|
||||
|
||||
def abort_request(self, rid: str):
|
||||
if rid not in self.rid_to_state:
|
||||
|
||||
Reference in New Issue
Block a user