fix: fix gpu-proc affinity set incorrectly when pp_size > 1 (#11389)
This commit is contained in:
@@ -510,7 +510,9 @@ def latency_test(
|
|||||||
|
|
||||||
# Set CPU affinity
|
# Set CPU affinity
|
||||||
if get_bool_env_var("SGLANG_SET_CPU_AFFINITY"):
|
if get_bool_env_var("SGLANG_SET_CPU_AFFINITY"):
|
||||||
set_gpu_proc_affinity(server_args.tp_size, server_args.nnodes, tp_rank)
|
set_gpu_proc_affinity(
|
||||||
|
server_args.pp_size, server_args.tp_size, server_args.nnodes, tp_rank
|
||||||
|
)
|
||||||
|
|
||||||
# Configure the logger
|
# Configure the logger
|
||||||
configure_logger(server_args, prefix=f" TP{tp_rank}")
|
configure_logger(server_args, prefix=f" TP{tp_rank}")
|
||||||
|
|||||||
@@ -2921,7 +2921,9 @@ def run_scheduler_process(
|
|||||||
|
|
||||||
# Set cpu affinity to this gpu process
|
# Set cpu affinity to this gpu process
|
||||||
if get_bool_env_var("SGLANG_SET_CPU_AFFINITY"):
|
if get_bool_env_var("SGLANG_SET_CPU_AFFINITY"):
|
||||||
set_gpu_proc_affinity(server_args.tp_size, server_args.nnodes, gpu_id)
|
set_gpu_proc_affinity(
|
||||||
|
server_args.pp_size, server_args.tp_size, server_args.nnodes, gpu_id
|
||||||
|
)
|
||||||
if (numa_node := server_args.numa_node) is not None:
|
if (numa_node := server_args.numa_node) is not None:
|
||||||
numa_bind_to_node(numa_node[gpu_id])
|
numa_bind_to_node(numa_node[gpu_id])
|
||||||
|
|
||||||
|
|||||||
@@ -1891,6 +1891,7 @@ def direct_register_custom_op(
|
|||||||
|
|
||||||
|
|
||||||
def set_gpu_proc_affinity(
|
def set_gpu_proc_affinity(
|
||||||
|
pp_size: int,
|
||||||
tp_size: int,
|
tp_size: int,
|
||||||
nnodes: int,
|
nnodes: int,
|
||||||
gpu_id: int,
|
gpu_id: int,
|
||||||
@@ -1899,7 +1900,8 @@ def set_gpu_proc_affinity(
|
|||||||
pid = os.getpid()
|
pid = os.getpid()
|
||||||
p = psutil.Process(pid)
|
p = psutil.Process(pid)
|
||||||
|
|
||||||
tp_size_per_node = tp_size // nnodes
|
nnodes_per_tp_group = max(nnodes // pp_size, 1)
|
||||||
|
tp_size_per_node = tp_size // nnodes_per_tp_group
|
||||||
|
|
||||||
# total physical cores
|
# total physical cores
|
||||||
total_pcores = psutil.cpu_count(logical=False)
|
total_pcores = psutil.cpu_count(logical=False)
|
||||||
|
|||||||
Reference in New Issue
Block a user