[PD] Support get_model_info interface for mini_lb (#9792)
Signed-off-by: Xuchun Shang <xuchun.shang@linux.alibaba.com> Co-authored-by: Teng Ma <sima.mt@alibaba-inc.com>
This commit is contained in:
@@ -7,6 +7,7 @@ import dataclasses
|
|||||||
import logging
|
import logging
|
||||||
import random
|
import random
|
||||||
import urllib
|
import urllib
|
||||||
|
from http import HTTPStatus
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
@@ -262,14 +263,38 @@ async def get_server_info():
|
|||||||
|
|
||||||
@app.get("/get_model_info")
|
@app.get("/get_model_info")
|
||||||
async def get_model_info():
|
async def get_model_info():
|
||||||
# Dummy model information
|
global load_balancer
|
||||||
model_info = {
|
|
||||||
"model_path": "/path/to/dummy/model",
|
if not load_balancer or not load_balancer.prefill_servers:
|
||||||
"tokenizer_path": "/path/to/dummy/tokenizer",
|
raise HTTPException(
|
||||||
"is_generation": True,
|
status_code=HTTPStatus.SERVICE_UNAVAILABLE,
|
||||||
"preferred_sampling_params": {"temperature": 0.7, "max_new_tokens": 128},
|
detail="There is no server registered",
|
||||||
}
|
)
|
||||||
return ORJSONResponse(content=model_info)
|
|
||||||
|
target_server_url = load_balancer.prefill_servers[0]
|
||||||
|
endpoint_url = f"{target_server_url}/get_model_info"
|
||||||
|
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
try:
|
||||||
|
async with session.get(endpoint_url) as response:
|
||||||
|
if response.status != 200:
|
||||||
|
error_text = await response.text()
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=HTTPStatus.BAD_GATEWAY,
|
||||||
|
detail=(
|
||||||
|
f"Failed to get model info from {target_server_url}"
|
||||||
|
f"Status: {response.status}, Response: {error_text}"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
model_info_json = await response.json()
|
||||||
|
return ORJSONResponse(content=model_info_json)
|
||||||
|
|
||||||
|
except aiohttp.ClientError as e:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=HTTPStatus.SERVICE_UNAVAILABLE,
|
||||||
|
detail=f"Failed to get model info from backend",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/generate")
|
@app.post("/generate")
|
||||||
|
|||||||
Reference in New Issue
Block a user