feat: add check_env (#645)

2024-07-18 14:39:28 +10:00
parent 476584cb6e
commit d93388da3e
5 changed files with 181 additions and 6 deletions
--- a/.github/ISSUE_TEMPLATE/1-bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/1-bug-report.yml
@@ -20,7 +20,7 @@ body:
  attributes:
    label: Reproduction
    description: |
-      1. What command or script did you run?
+      What command or script did you run?
    placeholder: |
      A placeholder for the command.
  validations:
@@ -29,7 +29,7 @@ body:
  attributes:
    label: Environment
    description: |
-      Please provide necessary environment information here.
+      Please provide necessary environment information here with `python3 -m sglang.check_env`.
    placeholder: Environment here.
    render: Shell
  validations:
--- a/python/sglang/check_env.py
+++ b/python/sglang/check_env.py
@@ -0,0 +1,163 @@
 import importlib
 import os
 import subprocess
 import sys
 from collections import OrderedDict, defaultdict
 import torch
 # List of packages to check versions for
 PACKAGE_LIST = [
    "sglang",
    "flashinfer",
    "aiohttp",
    "fastapi",
    "hf_transfer",
    "huggingface_hub",
    "interegular",
    "packaging",
    "pillow",
    "psutil",
    "pydantic",
    "rpyc",
    "uvicorn",
    "uvloop",
    "zmq",
    "vllm",
    "outlines",
    "openai",
    "tiktoken",
    "anthropic",
    "litellm",
 ]
 def get_package_versions(packages):
    """
    Get versions of specified packages.
    """
    versions = {}
    for package in packages:
        package_name = package.split("==")[0].split(">=")[0].split("<=")[0]
        try:
            module = importlib.import_module(package_name)
            if hasattr(module, "__version__"):
                versions[package_name] = module.__version__
        except ModuleNotFoundError:
            versions[package_name] = "Module Not Found"
    return versions
 def get_cuda_info():
    """
    Get CUDA-related information if available.
    """
    cuda_info = {"CUDA available": torch.cuda.is_available()}
    if cuda_info["CUDA available"]:
        cuda_info.update(_get_gpu_info())
        cuda_info.update(_get_cuda_version_info())
    return cuda_info
 def _get_gpu_info():
    """
    Get information about available GPUs.
    """
    devices = defaultdict(list)
    for k in range(torch.cuda.device_count()):
        devices[torch.cuda.get_device_name(k)].append(str(k))
    return {f"GPU {','.join(device_ids)}": name for name, device_ids in devices.items()}
 def _get_cuda_version_info():
    """
    Get CUDA version information.
    """
    from torch.utils.cpp_extension import CUDA_HOME
    cuda_info = {"CUDA_HOME": CUDA_HOME}
    if CUDA_HOME and os.path.isdir(CUDA_HOME):
        cuda_info.update(_get_nvcc_info())
        cuda_info.update(_get_cuda_driver_version())
    return cuda_info
 def _get_nvcc_info():
    """
    Get NVCC version information.
    """
    from torch.utils.cpp_extension import CUDA_HOME
    try:
        nvcc = os.path.join(CUDA_HOME, "bin/nvcc")
        nvcc_output = (
            subprocess.check_output(f'"{nvcc}" -V', shell=True).decode("utf-8").strip()
        )
        return {
            "NVCC": nvcc_output[
                nvcc_output.rfind("Cuda compilation tools") : nvcc_output.rfind("Build")
            ].strip()
        }
    except subprocess.SubprocessError:
        return {"NVCC": "Not Available"}
 def _get_cuda_driver_version():
    """
    Get CUDA driver version.
    """
    try:
        output = subprocess.check_output(
            [
                "nvidia-smi",
                "--query-gpu=driver_version",
                "--format=csv,noheader,nounits",
            ]
        )
        return {"CUDA Driver Version": output.decode().strip()}
    except subprocess.SubprocessError:
        return {"CUDA Driver Version": "Not Available"}
 def get_gpu_topology():
    """
    Get GPU topology information.
    """
    try:
        result = subprocess.run(
            ["nvidia-smi", "topo", "-m"],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            check=True,
        )
        return "\n" + result.stdout if result.returncode == 0 else None
    except subprocess.SubprocessError:
        return None
 def check_env():
    """
    Check and print environment information.
    """
    env_info = OrderedDict()
    env_info["Python"] = sys.version.replace("\n", "")
    env_info.update(get_cuda_info())
    env_info["PyTorch"] = torch.__version__
    env_info.update(get_package_versions(PACKAGE_LIST))
    gpu_topo = get_gpu_topology()
    if gpu_topo:
        env_info["NVIDIA Topology"] = gpu_topo
    for k, v in env_info.items():
        print(f"{k}: {v}")
 if __name__ == "__main__":
    check_env()
--- a/python/sglang/srt/managers/controller/infer_batch.py
+++ b/python/sglang/srt/managers/controller/infer_batch.py
@@ -327,8 +327,10 @@ class Batch:
        req_pool_indices = self.req_to_token_pool.alloc(bs)
        if req_pool_indices is None:
-            raise RuntimeError("Out of memory. "
+            raise RuntimeError(
-                               "Please set a smaller number for `--max-running-requests`.")
+                "Out of memory. "
                "Please set a smaller number for `--max-running-requests`."
            )
        req_pool_indices_cpu = req_pool_indices.cpu().numpy()
        for i in range(bs):
--- a/python/sglang/srt/managers/controller/model_runner.py
+++ b/python/sglang/srt/managers/controller/model_runner.py
@@ -168,7 +168,10 @@ class ModelRunner:
            )
        self.req_to_token_pool = ReqToTokenPool(
-            max(int(self.max_total_num_tokens / self.model_config.context_len * 512), 2048),
+            max(
                int(self.max_total_num_tokens / self.model_config.context_len * 512),
                2048,
            ),
            self.model_config.context_len + 8,
        )
        self.token_to_kv_pool = TokenToKVPool(
--- a/python/sglang/srt/memory_pool.py
+++ b/python/sglang/srt/memory_pool.py
@@ -44,7 +44,14 @@ class ReqToTokenPool:
 class TokenToKVPool:
    """A memory pool that maps a token to its kv cache locations"""
-    def __init__(self, size: int, dtype: torch.dtype, head_num: int, head_dim: int, layer_num: int):
+    def __init__(
        self,
        size: int,
        dtype: torch.dtype,
        head_num: int,
        head_dim: int,
        layer_num: int,
    ):
        self.size = size
        # We also add one slot. This slot is used for writing dummy output from padded tokens.