fix all cards super_pod_id same on A3 & proxy support min_tokens (#2939)
### What this PR does / why we need it?
fix all cards super_pod_id same on A3 & proxy support min_tokens
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
2*A3 gen ranktable
before:
"prefill_device_list": [
{
"server_id": "xxx",
"device_id": "0",
"device_ip": "xxx",
"super_pod_id": "0",
"super_device_id": "106758159",
"cluster_id": "1"
},
{
"server_id": "xxx",
"device_id": "1",
"device_ip": "xxx",
"super_pod_id": "0",
"super_device_id": "106758159",
"cluster_id": "2"
}...
after:
"prefill_device_list": [
{
"server_id": "xxx",
"device_id": "0",
"device_ip": "xxx",
"super_pod_id": "0",
"super_device_id": "104857600",
"cluster_id": "1"
},
{
"server_id": "xxx",
"device_id": "1",
"device_ip": "xxx",
"super_pod_id": "0",
"super_device_id": "104923137",
"cluster_id": "2"
}...
---------
Signed-off-by: liziyu <liziyu16@huawei.com>
This commit is contained in:
@@ -73,6 +73,8 @@ if local_rank == "0":
|
||||
super_pod_id = "0"
|
||||
for idx in range(len(local_device_ids)):
|
||||
device_id = local_device_ids[idx]
|
||||
chip_id = device_id % chips_per_card
|
||||
card_id = device_id // chips_per_card
|
||||
if soc_info == AscendSocVersion.A3:
|
||||
device_ip = get_cmd_stdout(
|
||||
f"{hccn_tool_path} -i {device_id} -vnic -g | grep ipaddr"
|
||||
|
||||
@@ -363,6 +363,7 @@ async def send_request_to_service(client: httpx.AsyncClient,
|
||||
}
|
||||
req_data["stream"] = False
|
||||
req_data["max_tokens"] = 1
|
||||
req_data["min_tokens"] = 1
|
||||
if "stream_options" in req_data:
|
||||
del req_data["stream_options"]
|
||||
headers = {
|
||||
|
||||
Reference in New Issue
Block a user