From 0fe87213bb147f027df6ca5a15db9e0a1718ccd8 Mon Sep 17 00:00:00 2001 From: Yingchun Lai Date: Fri, 10 Oct 2025 09:40:05 +0800 Subject: [PATCH] fix: fix gpu-proc affinity set incorrectly when pp_size > 1 (#11389) --- python/sglang/bench_one_batch.py | 4 +++- python/sglang/srt/managers/scheduler.py | 4 +++- python/sglang/srt/utils/common.py | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/python/sglang/bench_one_batch.py b/python/sglang/bench_one_batch.py index f8a35266b..6dcfc0d3b 100644 --- a/python/sglang/bench_one_batch.py +++ b/python/sglang/bench_one_batch.py @@ -510,7 +510,9 @@ def latency_test( # Set CPU affinity if get_bool_env_var("SGLANG_SET_CPU_AFFINITY"): - set_gpu_proc_affinity(server_args.tp_size, server_args.nnodes, tp_rank) + set_gpu_proc_affinity( + server_args.pp_size, server_args.tp_size, server_args.nnodes, tp_rank + ) # Configure the logger configure_logger(server_args, prefix=f" TP{tp_rank}") diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index 2810e6e36..d2bc3c056 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -2921,7 +2921,9 @@ def run_scheduler_process( # Set cpu affinity to this gpu process if get_bool_env_var("SGLANG_SET_CPU_AFFINITY"): - set_gpu_proc_affinity(server_args.tp_size, server_args.nnodes, gpu_id) + set_gpu_proc_affinity( + server_args.pp_size, server_args.tp_size, server_args.nnodes, gpu_id + ) if (numa_node := server_args.numa_node) is not None: numa_bind_to_node(numa_node[gpu_id]) diff --git a/python/sglang/srt/utils/common.py b/python/sglang/srt/utils/common.py index 7ac6b20c5..c1cff5ebc 100644 --- a/python/sglang/srt/utils/common.py +++ b/python/sglang/srt/utils/common.py @@ -1891,6 +1891,7 @@ def direct_register_custom_op( def set_gpu_proc_affinity( + pp_size: int, tp_size: int, nnodes: int, gpu_id: int, @@ -1899,7 +1900,8 @@ def set_gpu_proc_affinity( pid = os.getpid() p = psutil.Process(pid) - tp_size_per_node = tp_size // nnodes + nnodes_per_tp_group = max(nnodes // pp_size, 1) + tp_size_per_node = tp_size // nnodes_per_tp_group # total physical cores total_pcores = psutil.cpu_count(logical=False)