diff --git a/examples/disaggregated_prefill_v1/gen_ranktable.py b/examples/disaggregated_prefill_v1/gen_ranktable.py index 37ea9b4..ad86c84 100644 --- a/examples/disaggregated_prefill_v1/gen_ranktable.py +++ b/examples/disaggregated_prefill_v1/gen_ranktable.py @@ -73,6 +73,8 @@ if local_rank == "0": super_pod_id = "0" for idx in range(len(local_device_ids)): device_id = local_device_ids[idx] + chip_id = device_id % chips_per_card + card_id = device_id // chips_per_card if soc_info == AscendSocVersion.A3: device_ip = get_cmd_stdout( f"{hccn_tool_path} -i {device_id} -vnic -g | grep ipaddr" diff --git a/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py b/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py index 727233e..2728931 100644 --- a/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +++ b/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py @@ -363,6 +363,7 @@ async def send_request_to_service(client: httpx.AsyncClient, } req_data["stream"] = False req_data["max_tokens"] = 1 + req_data["min_tokens"] = 1 if "stream_options" in req_data: del req_data["stream_options"] headers = {