Auto handle PD disaggregation in bench_serving (#6587)
Co-authored-by: yizhang2077 <1109276519@qq.com>
This commit is contained in:
@@ -1304,14 +1304,12 @@ async def benchmark(
|
||||
if "sglang" in backend:
|
||||
server_info = requests.get(base_url + "/get_server_info")
|
||||
if server_info.status_code == 200:
|
||||
if pd_separated:
|
||||
accept_length = server_info.json()["decode"][0]["internal_states"][
|
||||
0
|
||||
].get("avg_spec_accept_length", None)
|
||||
else:
|
||||
accept_length = server_info.json()["internal_states"][0].get(
|
||||
"avg_spec_accept_length", None
|
||||
)
|
||||
server_info_json = server_info.json()
|
||||
if "decode" in server_info_json:
|
||||
server_info_json = server_info_json["decode"][0]
|
||||
accept_length = server_info_json["internal_states"][0].get(
|
||||
"avg_spec_accept_length", None
|
||||
)
|
||||
else:
|
||||
accept_length = None
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user