Clean up server_args, triton cache manager (#8332)

This commit is contained in:
Lianmin Zheng
2025-07-25 14:14:51 -07:00
committed by GitHub
parent f8260f2539
commit ed2e313eb6
12 changed files with 128 additions and 204 deletions

View File

@@ -937,71 +937,6 @@ def monkey_patch_vllm_gguf_config():
setattr(GGUFConfig, "get_quant_method", get_quant_method_with_embedding_replaced)
def maybe_set_triton_cache_manager() -> None:
"""Set environment variable to tell Triton to use a
custom cache manager"""
cache_manger = os.environ.get("TRITON_CACHE_MANAGER", None)
if cache_manger is None:
manager = "sglang.srt.utils:CustomCacheManager"
logger.debug("Setting Triton cache manager to: %s", manager)
os.environ["TRITON_CACHE_MANAGER"] = manager
class CustomCacheManager(FileCacheManager):
# Adapted from: https://github.com/tdoublep/vllm/blob/3307522289fdfefe323b6c00d0db696651989a2f/vllm/triton_utils/custom_cache_manager.py
def __init__(self, key, override=False, dump=False):
from sglang.srt.distributed.parallel_state import get_tp_group
self.key = key
self.lock_path = None
try:
module_path = "triton.runtime.cache"
cache_module = importlib.import_module(module_path)
default_cache_dir = getattr(cache_module, "default_cache_dir", None)
default_dump_dir = getattr(cache_module, "default_dump_dir", None)
default_override_dir = getattr(cache_module, "default_override_dir", None)
except (ModuleNotFoundError, AttributeError) as e:
default_cache_dir = None
default_dump_dir = None
default_override_dir = None
if dump:
self.cache_dir = (
default_dump_dir()
if default_dump_dir is not None
else os.path.join(Path.home(), ".triton", "dump")
)
self.cache_dir = os.path.join(self.cache_dir, self.key)
self.lock_path = os.path.join(self.cache_dir, "lock")
os.makedirs(self.cache_dir, exist_ok=True)
elif override:
self.cache_dir = (
default_override_dir()
if default_override_dir is not None
else os.path.join(Path.home(), ".triton", "override")
)
self.cache_dir = os.path.join(self.cache_dir, self.key)
else:
# create cache directory if it doesn't exist
self.cache_dir = os.getenv("TRITON_CACHE_DIR", "").strip() or (
default_cache_dir()
if default_cache_dir is not None
else os.path.join(Path.home(), ".triton", "cache")
)
if self.cache_dir:
try:
self.cache_dir = f"{self.cache_dir}_{get_tp_group().local_rank}"
except:
self.cache_dir = f"{self.cache_dir}_{os.getpid()}"
self.cache_dir = os.path.join(self.cache_dir, self.key)
self.lock_path = os.path.join(self.cache_dir, "lock")
os.makedirs(self.cache_dir, exist_ok=True)
else:
raise RuntimeError("Could not create or locate cache dir")
def set_ulimit(target_soft_limit=65535):
# number of open files
resource_type = resource.RLIMIT_NOFILE