From df97b31f378995a3cd611445047e9aab9d23841b Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Fri, 5 Sep 2025 19:01:27 +0800 Subject: [PATCH] Tiny support setting numa nodes for different ranks (#10006) --- python/sglang/srt/managers/scheduler.py | 4 ++++ python/sglang/srt/server_args.py | 7 +++++++ python/sglang/srt/utils.py | 9 +++++++++ 3 files changed, 20 insertions(+) diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index 2b9cd5234..91901ca8b 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -158,6 +158,7 @@ from sglang.srt.utils import ( get_zmq_socket, is_cpu, kill_itself_when_parent_died, + numa_bind_to_node, point_to_point_pyobj, pyspy_dump_schedulers, require_mlp_sync, @@ -2519,6 +2520,9 @@ def run_scheduler_process( pipe_writer, balance_meta: Optional[DPBalanceMeta] = None, ): + if (numa_node := server_args.numa_node) is not None: + numa_bind_to_node(numa_node[gpu_id]) + # Generate the prefix prefix = "" if dp_rank is not None: diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index aaf9a49f5..779fb5be0 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -351,6 +351,7 @@ class ServerArgs: disable_fast_image_processor: bool = False enable_return_hidden_states: bool = False scheduler_recv_interval: int = 1 + numa_node: Optional[List[int]] = None # Debug tensor dumps debug_tensor_dump_output_folder: Optional[str] = None @@ -1991,6 +1992,12 @@ class ServerArgs: default=ServerArgs.scheduler_recv_interval, help="The interval to poll requests in scheduler. Can be set to >1 to reduce the overhead of this.", ) + parser.add_argument( + "--numa-node", + type=int, + nargs="+", + help="Sets the numa node for the subprocesses. i-th element corresponds to i-th subprocess.", + ) # Debug tensor dumps parser.add_argument( diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py index cb40266ec..22cdc051a 100644 --- a/python/sglang/srt/utils.py +++ b/python/sglang/srt/utils.py @@ -3027,3 +3027,12 @@ def check_cuda_result(raw_output): raise Exception(f"CUDA error: {err}") return results + + +def numa_bind_to_node(node: int): + libnuma = ctypes.CDLL("libnuma.so") + if libnuma.numa_available() < 0: + raise SystemError("numa not available on this system") + + libnuma.numa_run_on_node(ctypes.c_int(node)) + libnuma.numa_set_localalloc()