[Feat] Support update weights without restart server (#1157)

This commit is contained in:
Shan Yu
2024-08-20 13:48:24 -07:00
committed by GitHub
parent 350a81609b
commit cd10654e7e
7 changed files with 303 additions and 13 deletions

View File

@@ -51,7 +51,11 @@ from sglang.srt.managers.controller_single import (
start_controller_process as start_controller_process_single,
)
from sglang.srt.managers.detokenizer_manager import start_detokenizer_process
from sglang.srt.managers.io_struct import EmbeddingReqInput, GenerateReqInput
from sglang.srt.managers.io_struct import (
EmbeddingReqInput,
GenerateReqInput,
UpdateWeightReqInput,
)
from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.openai_api.adapter import (
load_chat_template_for_openai_api,
@@ -136,6 +140,23 @@ async def flush_cache():
)
@app.post("/update_weights")
async def update_weights(obj: UpdateWeightReqInput, request: Request):
success, message = await tokenizer_manager.update_weights(obj, request)
content = {"message": message, "success": str(success)}
if success:
return JSONResponse(
content,
status_code=HTTPStatus.OK,
)
else:
return JSONResponse(
content,
status_code=HTTPStatus.BAD_REQUEST,
)
async def generate_request(obj: GenerateReqInput, request: Request):
"""Handle a generate request."""
if obj.stream: