[CPU] Adding Memory Capacity Acquisition Functionality (#11102)
This commit is contained in:
@@ -639,7 +639,7 @@ class ServerArgs:
|
||||
if self.cuda_graph_max_bs > 300:
|
||||
reserved_mem += self.cuda_graph_max_bs * self.dp_size * 1.5
|
||||
|
||||
if gpu_mem > 60 * 1024:
|
||||
if gpu_mem is not None and gpu_mem > 60 * 1024:
|
||||
reserved_mem = max(reserved_mem, 10 * 1024)
|
||||
|
||||
if self.speculative_algorithm is not None:
|
||||
@@ -650,7 +650,11 @@ class ServerArgs:
|
||||
# eagle draft models and cuda graphs
|
||||
reserved_mem += 2 * 1024
|
||||
|
||||
self.mem_fraction_static = round((gpu_mem - reserved_mem) / gpu_mem, 3)
|
||||
self.mem_fraction_static = (
|
||||
round((gpu_mem - reserved_mem) / gpu_mem, 3)
|
||||
if gpu_mem is not None
|
||||
else 0.88
|
||||
)
|
||||
|
||||
# Lazy init to avoid circular import
|
||||
# Multimodal models need more memory for the image processor
|
||||
|
||||
@@ -1507,6 +1507,32 @@ def get_npu_memory_capacity():
|
||||
raise ImportError("torch_npu is required when run on npu device.")
|
||||
|
||||
|
||||
def get_cpu_memory_capacity():
|
||||
# Per-rank memory capacity cannot be determined for customized core settings
|
||||
if os.environ.get("SGLANG_CPU_OMP_THREADS_BIND", ""):
|
||||
return None
|
||||
n_numa_node: int = len(get_cpu_ids_by_node())
|
||||
if n_numa_node == 0:
|
||||
# Cannot determine NUMA config, fallback to total memory and avoid ZeroDivisionError.
|
||||
return float(psutil.virtual_memory().total // (1 << 20))
|
||||
try:
|
||||
numa_mem_list = list()
|
||||
file_prefix = "/sys/devices/system/node/"
|
||||
for numa_id in range(n_numa_node):
|
||||
file_meminfo = f"node{numa_id}/meminfo"
|
||||
with open(os.path.join(file_prefix, file_meminfo), "r") as f:
|
||||
# 1st line contains 'MemTotal'
|
||||
line = f.read().split("\n")[0]
|
||||
numa_mem_list.append(int(line.split()[3]))
|
||||
# Retrieved value in KB, need MB
|
||||
numa_mem = float(min(numa_mem_list) // 1024)
|
||||
return numa_mem
|
||||
except FileNotFoundError:
|
||||
numa_mem = psutil.virtual_memory().total / n_numa_node
|
||||
# Retrieved value in Byte, need MB
|
||||
return float(numa_mem // (1 << 20))
|
||||
|
||||
|
||||
def get_device_memory_capacity(device: str = None):
|
||||
if is_cuda():
|
||||
gpu_mem = get_nvgpu_memory_capacity()
|
||||
@@ -1516,6 +1542,8 @@ def get_device_memory_capacity(device: str = None):
|
||||
gpu_mem = get_hpu_memory_capacity()
|
||||
elif device == "npu":
|
||||
gpu_mem = get_npu_memory_capacity()
|
||||
elif device == "cpu":
|
||||
gpu_mem = get_cpu_memory_capacity()
|
||||
else:
|
||||
# GPU memory is not known yet or no GPU is available.
|
||||
gpu_mem = None
|
||||
|
||||
Reference in New Issue
Block a user