Crash the server when error or OOM happens (#514)
This commit is contained in:
@@ -12,6 +12,7 @@ from io import BytesIO
|
||||
from typing import List, Optional
|
||||
|
||||
import numpy as np
|
||||
import psutil
|
||||
import requests
|
||||
import rpyc
|
||||
import torch
|
||||
@@ -441,6 +442,27 @@ def assert_pkg_version(pkg: str, min_version: str):
|
||||
)
|
||||
|
||||
|
||||
def kill_parent_process():
|
||||
"""Kill the parent process and all children of the parent process."""
|
||||
current_process = psutil.Process()
|
||||
parent_process = current_process.parent()
|
||||
children = current_process.children(recursive=True)
|
||||
for child in children:
|
||||
if child.pid != current_process.pid:
|
||||
os.kill(child.pid, 9)
|
||||
os.kill(parent_process.pid, 9)
|
||||
|
||||
|
||||
def monkey_patch_vllm_p2p_access_check():
|
||||
"""
|
||||
Monkey patch the slow p2p access check in vllm.
|
||||
NOTE: We assume the p2p access is always allowed, which can be wrong for some setups.
|
||||
"""
|
||||
import vllm.distributed.device_communicators.custom_all_reduce_utils as tgt
|
||||
|
||||
setattr(tgt, "gpu_p2p_access_check", lambda *arg, **kwargs: True)
|
||||
|
||||
|
||||
API_KEY_HEADER_NAME = "X-API-Key"
|
||||
|
||||
|
||||
@@ -459,3 +481,4 @@ class APIKeyValidatorMiddleware(BaseHTTPMiddleware):
|
||||
)
|
||||
response = await call_next(request)
|
||||
return response
|
||||
|
||||
|
||||
Reference in New Issue
Block a user