Remove cached triton launcher (#656)

This commit is contained in:
Lianmin Zheng
2024-07-18 23:28:40 -07:00
committed by GitHub
parent 1b7adbb5a0
commit e1792cca24
5 changed files with 15 additions and 210 deletions

View File

@@ -51,6 +51,7 @@ from sglang.srt.utils import (
allocate_init_ports,
assert_pkg_version,
enable_show_time_cost,
set_ulimit,
)
from sglang.utils import get_exception_traceback
@@ -145,30 +146,6 @@ def _set_global_server_args(server_args: ServerArgs):
}
def _set_ulimit(target_soft_limit=65535):
import resource
resource_type = resource.RLIMIT_NOFILE
current_soft, current_hard = resource.getrlimit(resource_type)
if current_soft >= target_soft_limit:
logger.info(
f"Current limits are already sufficient: soft={current_soft}, hard={current_hard}"
)
else:
try:
resource.setrlimit(resource_type, (target_soft_limit, current_hard))
new_soft, new_hard = resource.getrlimit(resource_type)
logger.info(
f"Successfully set new limits: soft={new_soft}, hard={new_hard}"
)
except ValueError as e:
logger.warn(f"Failed to set new limits: {e}")
logger.info(
f"Limits remain unchanged: soft={current_soft}, hard={current_hard}"
)
def launch_server(
server_args: ServerArgs,
model_overide_args: Optional[dict] = None,
@@ -186,7 +163,7 @@ def launch_server(
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["NCCL_CUMEM_ENABLE"] = "0"
os.environ["NCCL_NVLS_ENABLE"] = "0"
_set_ulimit()
set_ulimit()
if server_args.show_time_cost:
enable_show_time_cost()
if server_args.disable_disk_cache: