Auto handle PD disaggregation in bench_serving (#6587)
Co-authored-by: yizhang2077 <1109276519@qq.com>
This commit is contained in:
@@ -1304,14 +1304,12 @@ async def benchmark(
|
|||||||
if "sglang" in backend:
|
if "sglang" in backend:
|
||||||
server_info = requests.get(base_url + "/get_server_info")
|
server_info = requests.get(base_url + "/get_server_info")
|
||||||
if server_info.status_code == 200:
|
if server_info.status_code == 200:
|
||||||
if pd_separated:
|
server_info_json = server_info.json()
|
||||||
accept_length = server_info.json()["decode"][0]["internal_states"][
|
if "decode" in server_info_json:
|
||||||
0
|
server_info_json = server_info_json["decode"][0]
|
||||||
].get("avg_spec_accept_length", None)
|
accept_length = server_info_json["internal_states"][0].get(
|
||||||
else:
|
"avg_spec_accept_length", None
|
||||||
accept_length = server_info.json()["internal_states"][0].get(
|
)
|
||||||
"avg_spec_accept_length", None
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
accept_length = None
|
accept_length = None
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
prompt = "The capital of taiwan is "
|
prompt = "The capital of france is "
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user