Organize sampling batch info better (#1562)

This commit is contained in:
Lianmin Zheng
2024-10-03 18:29:49 -07:00
committed by GitHub
parent e0b5dbcec1
commit 32eb6e96f2
8 changed files with 43 additions and 35 deletions

View File

@@ -118,6 +118,7 @@ async def health_generate(request: Request) -> Response:
@app.get("/get_model_info")
async def get_model_info():
"""Get the model information."""
result = {
"model_path": tokenizer_manager.model_path,
"is_generation": tokenizer_manager.is_generation,
@@ -127,11 +128,13 @@ async def get_model_info():
@app.get("/get_server_args")
async def get_server_args():
"""Get the server arguments."""
return dataclasses.asdict(tokenizer_manager.server_args)
@app.get("/flush_cache")
async def flush_cache():
"""Flush the radix cache."""
tokenizer_manager.flush_cache()
return Response(
content="Cache flushed.\nPlease check backend logs for more details. "
@@ -142,7 +145,7 @@ async def flush_cache():
@app.post("/update_weights")
async def update_weights(obj: UpdateWeightReqInput, request: Request):
"""Update the weights inplace without re-launching the server."""
success, message = await tokenizer_manager.update_weights(obj, request)
content = {"success": success, "message": message}
if success:
@@ -205,7 +208,7 @@ app.put("/encode")(encode_request)
async def judge_request(obj: RewardReqInput, request: Request):
"""Handle an embedding request."""
"""Handle a reward model request."""
try:
ret = await tokenizer_manager.generate_request(obj, request).__anext__()
return ret
@@ -307,7 +310,7 @@ def launch_server(
ports = server_args.additional_ports
port_args = PortArgs(
tokenizer_port=ports[0],
scheduler_port=ports[1],
scheduler_input_port=ports[1],
detokenizer_port=ports[2],
nccl_ports=ports[3:],
)