Clean up server_args, triton cache manager (#8332)
This commit is contained in:
@@ -937,71 +937,6 @@ def monkey_patch_vllm_gguf_config():
|
||||
setattr(GGUFConfig, "get_quant_method", get_quant_method_with_embedding_replaced)
|
||||
|
||||
|
||||
def maybe_set_triton_cache_manager() -> None:
|
||||
"""Set environment variable to tell Triton to use a
|
||||
custom cache manager"""
|
||||
cache_manger = os.environ.get("TRITON_CACHE_MANAGER", None)
|
||||
if cache_manger is None:
|
||||
manager = "sglang.srt.utils:CustomCacheManager"
|
||||
logger.debug("Setting Triton cache manager to: %s", manager)
|
||||
os.environ["TRITON_CACHE_MANAGER"] = manager
|
||||
|
||||
|
||||
class CustomCacheManager(FileCacheManager):
|
||||
# Adapted from: https://github.com/tdoublep/vllm/blob/3307522289fdfefe323b6c00d0db696651989a2f/vllm/triton_utils/custom_cache_manager.py
|
||||
def __init__(self, key, override=False, dump=False):
|
||||
from sglang.srt.distributed.parallel_state import get_tp_group
|
||||
|
||||
self.key = key
|
||||
self.lock_path = None
|
||||
|
||||
try:
|
||||
module_path = "triton.runtime.cache"
|
||||
cache_module = importlib.import_module(module_path)
|
||||
|
||||
default_cache_dir = getattr(cache_module, "default_cache_dir", None)
|
||||
default_dump_dir = getattr(cache_module, "default_dump_dir", None)
|
||||
default_override_dir = getattr(cache_module, "default_override_dir", None)
|
||||
except (ModuleNotFoundError, AttributeError) as e:
|
||||
default_cache_dir = None
|
||||
default_dump_dir = None
|
||||
default_override_dir = None
|
||||
|
||||
if dump:
|
||||
self.cache_dir = (
|
||||
default_dump_dir()
|
||||
if default_dump_dir is not None
|
||||
else os.path.join(Path.home(), ".triton", "dump")
|
||||
)
|
||||
self.cache_dir = os.path.join(self.cache_dir, self.key)
|
||||
self.lock_path = os.path.join(self.cache_dir, "lock")
|
||||
os.makedirs(self.cache_dir, exist_ok=True)
|
||||
elif override:
|
||||
self.cache_dir = (
|
||||
default_override_dir()
|
||||
if default_override_dir is not None
|
||||
else os.path.join(Path.home(), ".triton", "override")
|
||||
)
|
||||
self.cache_dir = os.path.join(self.cache_dir, self.key)
|
||||
else:
|
||||
# create cache directory if it doesn't exist
|
||||
self.cache_dir = os.getenv("TRITON_CACHE_DIR", "").strip() or (
|
||||
default_cache_dir()
|
||||
if default_cache_dir is not None
|
||||
else os.path.join(Path.home(), ".triton", "cache")
|
||||
)
|
||||
if self.cache_dir:
|
||||
try:
|
||||
self.cache_dir = f"{self.cache_dir}_{get_tp_group().local_rank}"
|
||||
except:
|
||||
self.cache_dir = f"{self.cache_dir}_{os.getpid()}"
|
||||
self.cache_dir = os.path.join(self.cache_dir, self.key)
|
||||
self.lock_path = os.path.join(self.cache_dir, "lock")
|
||||
os.makedirs(self.cache_dir, exist_ok=True)
|
||||
else:
|
||||
raise RuntimeError("Could not create or locate cache dir")
|
||||
|
||||
|
||||
def set_ulimit(target_soft_limit=65535):
|
||||
# number of open files
|
||||
resource_type = resource.RLIMIT_NOFILE
|
||||
|
||||
Reference in New Issue
Block a user