Crash the server when error or OOM happens (#514)

This commit is contained in:
Lianmin Zheng
2024-06-07 19:22:34 -07:00
committed by GitHub
parent f70f72586a
commit 91f93f141f
7 changed files with 63 additions and 22 deletions

View File

@@ -34,7 +34,7 @@ from sglang.srt.utils import (
)
from sglang.utils import get_exception_traceback
logger = logging.getLogger("srt.model_tp")
logger = logging.getLogger("srt.tp_worker")
class ModelTpServer:
@@ -187,7 +187,8 @@ class ModelTpServer:
# Forward
self.forward_step()
except Exception:
logger.error("Exception in ModelTpClient:\n" + get_exception_traceback())
logger.error("Exception in ModelTpServer:\n" + get_exception_traceback())
raise
# Return results
ret = self.out_pyobjs