[Feat] Support update weights without restart server (#1157)
This commit is contained in:
@@ -51,7 +51,11 @@ from sglang.srt.managers.controller_single import (
|
||||
start_controller_process as start_controller_process_single,
|
||||
)
|
||||
from sglang.srt.managers.detokenizer_manager import start_detokenizer_process
|
||||
from sglang.srt.managers.io_struct import EmbeddingReqInput, GenerateReqInput
|
||||
from sglang.srt.managers.io_struct import (
|
||||
EmbeddingReqInput,
|
||||
GenerateReqInput,
|
||||
UpdateWeightReqInput,
|
||||
)
|
||||
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
||||
from sglang.srt.openai_api.adapter import (
|
||||
load_chat_template_for_openai_api,
|
||||
@@ -136,6 +140,23 @@ async def flush_cache():
|
||||
)
|
||||
|
||||
|
||||
@app.post("/update_weights")
|
||||
async def update_weights(obj: UpdateWeightReqInput, request: Request):
|
||||
|
||||
success, message = await tokenizer_manager.update_weights(obj, request)
|
||||
content = {"message": message, "success": str(success)}
|
||||
if success:
|
||||
return JSONResponse(
|
||||
content,
|
||||
status_code=HTTPStatus.OK,
|
||||
)
|
||||
else:
|
||||
return JSONResponse(
|
||||
content,
|
||||
status_code=HTTPStatus.BAD_REQUEST,
|
||||
)
|
||||
|
||||
|
||||
async def generate_request(obj: GenerateReqInput, request: Request):
|
||||
"""Handle a generate request."""
|
||||
if obj.stream:
|
||||
|
||||
Reference in New Issue
Block a user