From aa3c4563ce8ea83f6a42041b3c5559389f87d892 Mon Sep 17 00:00:00 2001 From: liziyu <56102866+liziyu179@users.noreply.github.com> Date: Tue, 16 Sep 2025 01:09:18 +0800 Subject: [PATCH] fix all cards super_pod_id same on A3 & proxy support min_tokens (#2939) ### What this PR does / why we need it? fix all cards super_pod_id same on A3 & proxy support min_tokens ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? 2*A3 gen ranktable before: "prefill_device_list": [ { "server_id": "xxx", "device_id": "0", "device_ip": "xxx", "super_pod_id": "0", "super_device_id": "106758159", "cluster_id": "1" }, { "server_id": "xxx", "device_id": "1", "device_ip": "xxx", "super_pod_id": "0", "super_device_id": "106758159", "cluster_id": "2" }... after: "prefill_device_list": [ { "server_id": "xxx", "device_id": "0", "device_ip": "xxx", "super_pod_id": "0", "super_device_id": "104857600", "cluster_id": "1" }, { "server_id": "xxx", "device_id": "1", "device_ip": "xxx", "super_pod_id": "0", "super_device_id": "104923137", "cluster_id": "2" }... --------- Signed-off-by: liziyu --- examples/disaggregated_prefill_v1/gen_ranktable.py | 2 ++ .../load_balance_proxy_server_example.py | 1 + 2 files changed, 3 insertions(+) diff --git a/examples/disaggregated_prefill_v1/gen_ranktable.py b/examples/disaggregated_prefill_v1/gen_ranktable.py index 37ea9b4..ad86c84 100644 --- a/examples/disaggregated_prefill_v1/gen_ranktable.py +++ b/examples/disaggregated_prefill_v1/gen_ranktable.py @@ -73,6 +73,8 @@ if local_rank == "0": super_pod_id = "0" for idx in range(len(local_device_ids)): device_id = local_device_ids[idx] + chip_id = device_id % chips_per_card + card_id = device_id // chips_per_card if soc_info == AscendSocVersion.A3: device_ip = get_cmd_stdout( f"{hccn_tool_path} -i {device_id} -vnic -g | grep ipaddr" diff --git a/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py b/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py index 727233e..2728931 100644 --- a/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +++ b/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py @@ -363,6 +363,7 @@ async def send_request_to_service(client: httpx.AsyncClient, } req_data["stream"] = False req_data["max_tokens"] = 1 + req_data["min_tokens"] = 1 if "stream_options" in req_data: del req_data["stream_options"] headers = {