[API] add get memory pool size (#1760)

Co-authored-by: Byron Hsu <byronhsu1230@gmail.com>
This commit is contained in:
Ying Sheng
2024-10-23 00:02:29 -07:00
committed by GitHub
parent ad4125d1a9
commit 2fce449b1c
6 changed files with 50 additions and 0 deletions

View File

@@ -38,6 +38,8 @@ from sglang.srt.managers.io_struct import (
BatchEmbeddingOut,
BatchTokenIDOut,
FlushCacheReq,
GetMemPoolSizeReq,
GetMemPoolSizeReqOutput,
ProfileReq,
TokenizedEmbeddingReqInput,
TokenizedGenerateReqInput,
@@ -363,6 +365,10 @@ class Scheduler:
self.start_profile()
else:
self.stop_profile()
elif isinstance(recv_req, GetMemPoolSizeReq):
self.send_to_detokenizer.send_pyobj(
GetMemPoolSizeReqOutput(self.max_total_num_tokens)
)
else:
raise ValueError(f"Invalid request: {recv_req}")