[PD] Support get_model_info interface for mini_lb (#9792)

Signed-off-by: Xuchun Shang <xuchun.shang@linux.alibaba.com>
Co-authored-by: Teng Ma <sima.mt@alibaba-inc.com>
This commit is contained in:
Xuchun Shang
2025-08-29 15:54:03 +08:00
committed by GitHub
parent 9a7c8842ba
commit e5b29bf14e

View File

@@ -7,6 +7,7 @@ import dataclasses
import logging import logging
import random import random
import urllib import urllib
from http import HTTPStatus
from itertools import chain from itertools import chain
from typing import List, Optional from typing import List, Optional
@@ -262,14 +263,38 @@ async def get_server_info():
@app.get("/get_model_info") @app.get("/get_model_info")
async def get_model_info(): async def get_model_info():
# Dummy model information global load_balancer
model_info = {
"model_path": "/path/to/dummy/model", if not load_balancer or not load_balancer.prefill_servers:
"tokenizer_path": "/path/to/dummy/tokenizer", raise HTTPException(
"is_generation": True, status_code=HTTPStatus.SERVICE_UNAVAILABLE,
"preferred_sampling_params": {"temperature": 0.7, "max_new_tokens": 128}, detail="There is no server registered",
} )
return ORJSONResponse(content=model_info)
target_server_url = load_balancer.prefill_servers[0]
endpoint_url = f"{target_server_url}/get_model_info"
async with aiohttp.ClientSession() as session:
try:
async with session.get(endpoint_url) as response:
if response.status != 200:
error_text = await response.text()
raise HTTPException(
status_code=HTTPStatus.BAD_GATEWAY,
detail=(
f"Failed to get model info from {target_server_url}"
f"Status: {response.status}, Response: {error_text}"
),
)
model_info_json = await response.json()
return ORJSONResponse(content=model_info_json)
except aiohttp.ClientError as e:
raise HTTPException(
status_code=HTTPStatus.SERVICE_UNAVAILABLE,
detail=f"Failed to get model info from backend",
)
@app.post("/generate") @app.post("/generate")