fix all cards super_pod_id same on A3 & proxy support min_tokens (#2939)
### What this PR does / why we need it?
fix all cards super_pod_id same on A3 & proxy support min_tokens
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
2*A3 gen ranktable
before:
"prefill_device_list": [
{
"server_id": "xxx",
"device_id": "0",
"device_ip": "xxx",
"super_pod_id": "0",
"super_device_id": "106758159",
"cluster_id": "1"
},
{
"server_id": "xxx",
"device_id": "1",
"device_ip": "xxx",
"super_pod_id": "0",
"super_device_id": "106758159",
"cluster_id": "2"
}...
after:
"prefill_device_list": [
{
"server_id": "xxx",
"device_id": "0",
"device_ip": "xxx",
"super_pod_id": "0",
"super_device_id": "104857600",
"cluster_id": "1"
},
{
"server_id": "xxx",
"device_id": "1",
"device_ip": "xxx",
"super_pod_id": "0",
"super_device_id": "104923137",
"cluster_id": "2"
}...
---------
Signed-off-by: liziyu <liziyu16@huawei.com>
This commit is contained in:
@@ -73,6 +73,8 @@ if local_rank == "0":
|
|||||||
super_pod_id = "0"
|
super_pod_id = "0"
|
||||||
for idx in range(len(local_device_ids)):
|
for idx in range(len(local_device_ids)):
|
||||||
device_id = local_device_ids[idx]
|
device_id = local_device_ids[idx]
|
||||||
|
chip_id = device_id % chips_per_card
|
||||||
|
card_id = device_id // chips_per_card
|
||||||
if soc_info == AscendSocVersion.A3:
|
if soc_info == AscendSocVersion.A3:
|
||||||
device_ip = get_cmd_stdout(
|
device_ip = get_cmd_stdout(
|
||||||
f"{hccn_tool_path} -i {device_id} -vnic -g | grep ipaddr"
|
f"{hccn_tool_path} -i {device_id} -vnic -g | grep ipaddr"
|
||||||
|
|||||||
@@ -363,6 +363,7 @@ async def send_request_to_service(client: httpx.AsyncClient,
|
|||||||
}
|
}
|
||||||
req_data["stream"] = False
|
req_data["stream"] = False
|
||||||
req_data["max_tokens"] = 1
|
req_data["max_tokens"] = 1
|
||||||
|
req_data["min_tokens"] = 1
|
||||||
if "stream_options" in req_data:
|
if "stream_options" in req_data:
|
||||||
del req_data["stream_options"]
|
del req_data["stream_options"]
|
||||||
headers = {
|
headers = {
|
||||||
|
|||||||
Reference in New Issue
Block a user