Tiny support setting numa nodes for different ranks (#10006)
This commit is contained in:
@@ -158,6 +158,7 @@ from sglang.srt.utils import (
|
||||
get_zmq_socket,
|
||||
is_cpu,
|
||||
kill_itself_when_parent_died,
|
||||
numa_bind_to_node,
|
||||
point_to_point_pyobj,
|
||||
pyspy_dump_schedulers,
|
||||
require_mlp_sync,
|
||||
@@ -2519,6 +2520,9 @@ def run_scheduler_process(
|
||||
pipe_writer,
|
||||
balance_meta: Optional[DPBalanceMeta] = None,
|
||||
):
|
||||
if (numa_node := server_args.numa_node) is not None:
|
||||
numa_bind_to_node(numa_node[gpu_id])
|
||||
|
||||
# Generate the prefix
|
||||
prefix = ""
|
||||
if dp_rank is not None:
|
||||
|
||||
@@ -351,6 +351,7 @@ class ServerArgs:
|
||||
disable_fast_image_processor: bool = False
|
||||
enable_return_hidden_states: bool = False
|
||||
scheduler_recv_interval: int = 1
|
||||
numa_node: Optional[List[int]] = None
|
||||
|
||||
# Debug tensor dumps
|
||||
debug_tensor_dump_output_folder: Optional[str] = None
|
||||
@@ -1991,6 +1992,12 @@ class ServerArgs:
|
||||
default=ServerArgs.scheduler_recv_interval,
|
||||
help="The interval to poll requests in scheduler. Can be set to >1 to reduce the overhead of this.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--numa-node",
|
||||
type=int,
|
||||
nargs="+",
|
||||
help="Sets the numa node for the subprocesses. i-th element corresponds to i-th subprocess.",
|
||||
)
|
||||
|
||||
# Debug tensor dumps
|
||||
parser.add_argument(
|
||||
|
||||
@@ -3027,3 +3027,12 @@ def check_cuda_result(raw_output):
|
||||
raise Exception(f"CUDA error: {err}")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def numa_bind_to_node(node: int):
|
||||
libnuma = ctypes.CDLL("libnuma.so")
|
||||
if libnuma.numa_available() < 0:
|
||||
raise SystemError("numa not available on this system")
|
||||
|
||||
libnuma.numa_run_on_node(ctypes.c_int(node))
|
||||
libnuma.numa_set_localalloc()
|
||||
|
||||
Reference in New Issue
Block a user