Crash the server when error or OOM happens (#514)

This commit is contained in:
Lianmin Zheng
2024-06-07 19:22:34 -07:00
committed by GitHub
parent f70f72586a
commit 91f93f141f
7 changed files with 63 additions and 22 deletions

View File

@@ -12,6 +12,7 @@ from io import BytesIO
from typing import List, Optional
import numpy as np
import psutil
import requests
import rpyc
import torch
@@ -441,6 +442,27 @@ def assert_pkg_version(pkg: str, min_version: str):
)
def kill_parent_process():
"""Kill the parent process and all children of the parent process."""
current_process = psutil.Process()
parent_process = current_process.parent()
children = current_process.children(recursive=True)
for child in children:
if child.pid != current_process.pid:
os.kill(child.pid, 9)
os.kill(parent_process.pid, 9)
def monkey_patch_vllm_p2p_access_check():
"""
Monkey patch the slow p2p access check in vllm.
NOTE: We assume the p2p access is always allowed, which can be wrong for some setups.
"""
import vllm.distributed.device_communicators.custom_all_reduce_utils as tgt
setattr(tgt, "gpu_p2p_access_check", lambda *arg, **kwargs: True)
API_KEY_HEADER_NAME = "X-API-Key"
@@ -459,3 +481,4 @@ class APIKeyValidatorMiddleware(BaseHTTPMiddleware):
)
response = await call_next(request)
return response