[Feature] Simple Improve Health Check Mechanism for Production-Grade Stability (#8115)
Signed-off-by: ybyang <ybyang7@iflytek.com>
This commit is contained in:
@@ -93,6 +93,22 @@ time_infos = {}
|
||||
HIP_FP8_E4M3_FNUZ_MAX = 224.0
|
||||
|
||||
|
||||
class ServerStatus(Enum):
|
||||
Up = "Up"
|
||||
Starting = "Starting"
|
||||
UnHealthy = "UnHealthy"
|
||||
Crashed = "Crashed"
|
||||
|
||||
def is_healthy(self) -> bool:
|
||||
return self == ServerStatus.Up
|
||||
|
||||
|
||||
def report_health(status: ServerStatus, host: str, http_port: int, msg: str = ""):
|
||||
requests.post(
|
||||
f"http://{host}:{http_port}/health", json={"status": status.value, "msg": msg}
|
||||
)
|
||||
|
||||
|
||||
# https://pytorch.org/docs/stable/notes/hip.html#checking-for-hip
|
||||
def is_hip() -> bool:
|
||||
return torch.version.hip is not None
|
||||
|
||||
Reference in New Issue
Block a user