feat: add check_env (#645)

2024-07-18 14:39:28 +10:00
parent 476584cb6e
commit d93388da3e
5 changed files with 181 additions and 6 deletions
--- a/python/sglang/check_env.py
+++ b/python/sglang/check_env.py
@@ -0,0 +1,163 @@
+import importlib
+import os
+import subprocess
+import sys
+from collections import OrderedDict, defaultdict
+
+import torch
+
+# List of packages to check versions for
+PACKAGE_LIST = [
+    "sglang",
+    "flashinfer",
+    "aiohttp",
+    "fastapi",
+    "hf_transfer",
+    "huggingface_hub",
+    "interegular",
+    "packaging",
+    "pillow",
+    "psutil",
+    "pydantic",
+    "rpyc",
+    "uvicorn",
+    "uvloop",
+    "zmq",
+    "vllm",
+    "outlines",
+    "openai",
+    "tiktoken",
+    "anthropic",
+    "litellm",
+]
+
+
+def get_package_versions(packages):
+    """
+    Get versions of specified packages.
+    """
+    versions = {}
+    for package in packages:
+        package_name = package.split("==")[0].split(">=")[0].split("<=")[0]
+        try:
+            module = importlib.import_module(package_name)
+            if hasattr(module, "__version__"):
+                versions[package_name] = module.__version__
+        except ModuleNotFoundError:
+            versions[package_name] = "Module Not Found"
+    return versions
+
+
+def get_cuda_info():
+    """
+    Get CUDA-related information if available.
+    """
+    cuda_info = {"CUDA available": torch.cuda.is_available()}
+
+    if cuda_info["CUDA available"]:
+        cuda_info.update(_get_gpu_info())
+        cuda_info.update(_get_cuda_version_info())
+
+    return cuda_info
+
+
+def _get_gpu_info():
+    """
+    Get information about available GPUs.
+    """
+    devices = defaultdict(list)
+    for k in range(torch.cuda.device_count()):
+        devices[torch.cuda.get_device_name(k)].append(str(k))
+
+    return {f"GPU {','.join(device_ids)}": name for name, device_ids in devices.items()}
+
+
+def _get_cuda_version_info():
+    """
+    Get CUDA version information.
+    """
+    from torch.utils.cpp_extension import CUDA_HOME
+
+    cuda_info = {"CUDA_HOME": CUDA_HOME}
+
+    if CUDA_HOME and os.path.isdir(CUDA_HOME):
+        cuda_info.update(_get_nvcc_info())
+        cuda_info.update(_get_cuda_driver_version())
+
+    return cuda_info
+
+
+def _get_nvcc_info():
+    """
+    Get NVCC version information.
+    """
+    from torch.utils.cpp_extension import CUDA_HOME
+
+    try:
+        nvcc = os.path.join(CUDA_HOME, "bin/nvcc")
+        nvcc_output = (
+            subprocess.check_output(f'"{nvcc}" -V', shell=True).decode("utf-8").strip()
+        )
+        return {
+            "NVCC": nvcc_output[
+                nvcc_output.rfind("Cuda compilation tools") : nvcc_output.rfind("Build")
+            ].strip()
+        }
+    except subprocess.SubprocessError:
+        return {"NVCC": "Not Available"}
+
+
+def _get_cuda_driver_version():
+    """
+    Get CUDA driver version.
+    """
+    try:
+        output = subprocess.check_output(
+            [
+                "nvidia-smi",
+                "--query-gpu=driver_version",
+                "--format=csv,noheader,nounits",
+            ]
+        )
+        return {"CUDA Driver Version": output.decode().strip()}
+    except subprocess.SubprocessError:
+        return {"CUDA Driver Version": "Not Available"}
+
+
+def get_gpu_topology():
+    """
+    Get GPU topology information.
+    """
+    try:
+        result = subprocess.run(
+            ["nvidia-smi", "topo", "-m"],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            check=True,
+        )
+        return "\n" + result.stdout if result.returncode == 0 else None
+    except subprocess.SubprocessError:
+        return None
+
+
+def check_env():
+    """
+    Check and print environment information.
+    """
+    env_info = OrderedDict()
+    env_info["Python"] = sys.version.replace("\n", "")
+    env_info.update(get_cuda_info())
+    env_info["PyTorch"] = torch.__version__
+    env_info.update(get_package_versions(PACKAGE_LIST))
+
+    gpu_topo = get_gpu_topology()
+    if gpu_topo:
+        env_info["NVIDIA Topology"] = gpu_topo
+
+    for k, v in env_info.items():
+        print(f"{k}: {v}")
+
+
+if __name__ == "__main__":
+    check_env()
--- a/python/sglang/srt/managers/controller/infer_batch.py
+++ b/python/sglang/srt/managers/controller/infer_batch.py
@@ -327,8 +327,10 @@ class Batch:
        req_pool_indices = self.req_to_token_pool.alloc(bs)

        if req_pool_indices is None:
-            raise RuntimeError("Out of memory. "
-                               "Please set a smaller number for `--max-running-requests`.")
+            raise RuntimeError(
+                "Out of memory. "
+                "Please set a smaller number for `--max-running-requests`."
+            )

        req_pool_indices_cpu = req_pool_indices.cpu().numpy()
        for i in range(bs):
--- a/python/sglang/srt/managers/controller/model_runner.py
+++ b/python/sglang/srt/managers/controller/model_runner.py
@@ -168,7 +168,10 @@ class ModelRunner:
            )

        self.req_to_token_pool = ReqToTokenPool(
-            max(int(self.max_total_num_tokens / self.model_config.context_len * 512), 2048),
+            max(
+                int(self.max_total_num_tokens / self.model_config.context_len * 512),
+                2048,
+            ),
            self.model_config.context_len + 8,
        )
        self.token_to_kv_pool = TokenToKVPool(
--- a/python/sglang/srt/memory_pool.py
+++ b/python/sglang/srt/memory_pool.py
@@ -44,7 +44,14 @@ class ReqToTokenPool:
 class TokenToKVPool:
    """A memory pool that maps a token to its kv cache locations"""

-    def __init__(self, size: int, dtype: torch.dtype, head_num: int, head_dim: int, layer_num: int):
+    def __init__(
+        self,
+        size: int,
+        dtype: torch.dtype,
+        head_num: int,
+        head_dim: int,
+        layer_num: int,
+    ):
        self.size = size

        # We also add one slot. This slot is used for writing dummy output from padded tokens.