Tiny support setting numa nodes for different ranks (#10006)
This commit is contained in:
@@ -158,6 +158,7 @@ from sglang.srt.utils import (
|
|||||||
get_zmq_socket,
|
get_zmq_socket,
|
||||||
is_cpu,
|
is_cpu,
|
||||||
kill_itself_when_parent_died,
|
kill_itself_when_parent_died,
|
||||||
|
numa_bind_to_node,
|
||||||
point_to_point_pyobj,
|
point_to_point_pyobj,
|
||||||
pyspy_dump_schedulers,
|
pyspy_dump_schedulers,
|
||||||
require_mlp_sync,
|
require_mlp_sync,
|
||||||
@@ -2519,6 +2520,9 @@ def run_scheduler_process(
|
|||||||
pipe_writer,
|
pipe_writer,
|
||||||
balance_meta: Optional[DPBalanceMeta] = None,
|
balance_meta: Optional[DPBalanceMeta] = None,
|
||||||
):
|
):
|
||||||
|
if (numa_node := server_args.numa_node) is not None:
|
||||||
|
numa_bind_to_node(numa_node[gpu_id])
|
||||||
|
|
||||||
# Generate the prefix
|
# Generate the prefix
|
||||||
prefix = ""
|
prefix = ""
|
||||||
if dp_rank is not None:
|
if dp_rank is not None:
|
||||||
|
|||||||
@@ -351,6 +351,7 @@ class ServerArgs:
|
|||||||
disable_fast_image_processor: bool = False
|
disable_fast_image_processor: bool = False
|
||||||
enable_return_hidden_states: bool = False
|
enable_return_hidden_states: bool = False
|
||||||
scheduler_recv_interval: int = 1
|
scheduler_recv_interval: int = 1
|
||||||
|
numa_node: Optional[List[int]] = None
|
||||||
|
|
||||||
# Debug tensor dumps
|
# Debug tensor dumps
|
||||||
debug_tensor_dump_output_folder: Optional[str] = None
|
debug_tensor_dump_output_folder: Optional[str] = None
|
||||||
@@ -1991,6 +1992,12 @@ class ServerArgs:
|
|||||||
default=ServerArgs.scheduler_recv_interval,
|
default=ServerArgs.scheduler_recv_interval,
|
||||||
help="The interval to poll requests in scheduler. Can be set to >1 to reduce the overhead of this.",
|
help="The interval to poll requests in scheduler. Can be set to >1 to reduce the overhead of this.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--numa-node",
|
||||||
|
type=int,
|
||||||
|
nargs="+",
|
||||||
|
help="Sets the numa node for the subprocesses. i-th element corresponds to i-th subprocess.",
|
||||||
|
)
|
||||||
|
|
||||||
# Debug tensor dumps
|
# Debug tensor dumps
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|||||||
@@ -3027,3 +3027,12 @@ def check_cuda_result(raw_output):
|
|||||||
raise Exception(f"CUDA error: {err}")
|
raise Exception(f"CUDA error: {err}")
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def numa_bind_to_node(node: int):
|
||||||
|
libnuma = ctypes.CDLL("libnuma.so")
|
||||||
|
if libnuma.numa_available() < 0:
|
||||||
|
raise SystemError("numa not available on this system")
|
||||||
|
|
||||||
|
libnuma.numa_run_on_node(ctypes.c_int(node))
|
||||||
|
libnuma.numa_set_localalloc()
|
||||||
|
|||||||
Reference in New Issue
Block a user