[CPU] Adding Memory Capacity Acquisition Functionality (#11102)
This commit is contained in:
@@ -639,7 +639,7 @@ class ServerArgs:
|
|||||||
if self.cuda_graph_max_bs > 300:
|
if self.cuda_graph_max_bs > 300:
|
||||||
reserved_mem += self.cuda_graph_max_bs * self.dp_size * 1.5
|
reserved_mem += self.cuda_graph_max_bs * self.dp_size * 1.5
|
||||||
|
|
||||||
if gpu_mem > 60 * 1024:
|
if gpu_mem is not None and gpu_mem > 60 * 1024:
|
||||||
reserved_mem = max(reserved_mem, 10 * 1024)
|
reserved_mem = max(reserved_mem, 10 * 1024)
|
||||||
|
|
||||||
if self.speculative_algorithm is not None:
|
if self.speculative_algorithm is not None:
|
||||||
@@ -650,7 +650,11 @@ class ServerArgs:
|
|||||||
# eagle draft models and cuda graphs
|
# eagle draft models and cuda graphs
|
||||||
reserved_mem += 2 * 1024
|
reserved_mem += 2 * 1024
|
||||||
|
|
||||||
self.mem_fraction_static = round((gpu_mem - reserved_mem) / gpu_mem, 3)
|
self.mem_fraction_static = (
|
||||||
|
round((gpu_mem - reserved_mem) / gpu_mem, 3)
|
||||||
|
if gpu_mem is not None
|
||||||
|
else 0.88
|
||||||
|
)
|
||||||
|
|
||||||
# Lazy init to avoid circular import
|
# Lazy init to avoid circular import
|
||||||
# Multimodal models need more memory for the image processor
|
# Multimodal models need more memory for the image processor
|
||||||
|
|||||||
@@ -1507,6 +1507,32 @@ def get_npu_memory_capacity():
|
|||||||
raise ImportError("torch_npu is required when run on npu device.")
|
raise ImportError("torch_npu is required when run on npu device.")
|
||||||
|
|
||||||
|
|
||||||
|
def get_cpu_memory_capacity():
|
||||||
|
# Per-rank memory capacity cannot be determined for customized core settings
|
||||||
|
if os.environ.get("SGLANG_CPU_OMP_THREADS_BIND", ""):
|
||||||
|
return None
|
||||||
|
n_numa_node: int = len(get_cpu_ids_by_node())
|
||||||
|
if n_numa_node == 0:
|
||||||
|
# Cannot determine NUMA config, fallback to total memory and avoid ZeroDivisionError.
|
||||||
|
return float(psutil.virtual_memory().total // (1 << 20))
|
||||||
|
try:
|
||||||
|
numa_mem_list = list()
|
||||||
|
file_prefix = "/sys/devices/system/node/"
|
||||||
|
for numa_id in range(n_numa_node):
|
||||||
|
file_meminfo = f"node{numa_id}/meminfo"
|
||||||
|
with open(os.path.join(file_prefix, file_meminfo), "r") as f:
|
||||||
|
# 1st line contains 'MemTotal'
|
||||||
|
line = f.read().split("\n")[0]
|
||||||
|
numa_mem_list.append(int(line.split()[3]))
|
||||||
|
# Retrieved value in KB, need MB
|
||||||
|
numa_mem = float(min(numa_mem_list) // 1024)
|
||||||
|
return numa_mem
|
||||||
|
except FileNotFoundError:
|
||||||
|
numa_mem = psutil.virtual_memory().total / n_numa_node
|
||||||
|
# Retrieved value in Byte, need MB
|
||||||
|
return float(numa_mem // (1 << 20))
|
||||||
|
|
||||||
|
|
||||||
def get_device_memory_capacity(device: str = None):
|
def get_device_memory_capacity(device: str = None):
|
||||||
if is_cuda():
|
if is_cuda():
|
||||||
gpu_mem = get_nvgpu_memory_capacity()
|
gpu_mem = get_nvgpu_memory_capacity()
|
||||||
@@ -1516,6 +1542,8 @@ def get_device_memory_capacity(device: str = None):
|
|||||||
gpu_mem = get_hpu_memory_capacity()
|
gpu_mem = get_hpu_memory_capacity()
|
||||||
elif device == "npu":
|
elif device == "npu":
|
||||||
gpu_mem = get_npu_memory_capacity()
|
gpu_mem = get_npu_memory_capacity()
|
||||||
|
elif device == "cpu":
|
||||||
|
gpu_mem = get_cpu_memory_capacity()
|
||||||
else:
|
else:
|
||||||
# GPU memory is not known yet or no GPU is available.
|
# GPU memory is not known yet or no GPU is available.
|
||||||
gpu_mem = None
|
gpu_mem = None
|
||||||
|
|||||||
Reference in New Issue
Block a user