xc-llm-kunlun/collect_env.py

# SPDX-License-Identifier: Apache-2.0
# vLLM-Kunlun Environment Information Collection Tool (Fixed Version)
"""
Environment information collection script for Kunlun XPU
Fixed the following issues:
1. Device name displayed as "GPU" → Now correctly shows "P800 OAM"
2. XRE version command error → Now parsed from xpu-smi output
3. vLLM-Kunlun version hardcoded → Now fetched from pip package metadata
"""

import os
import re
import subprocess
import sys
from collections import namedtuple

# =============================================================================
# Part 1: Basic Utility Functions
# =============================================================================


def run(command):
    """
    Execute shell command and return result
    [Principle Explanation - Web Development Analogy]
    This is like the fetch() function in frontend development, sending a request and getting a response.
    - command: The command to execute (similar to a URL)
    - returns: (return_code, stdout, stderr)
    Args:
        command: Command as string or list
    Returns:
        tuple: (return_code, stdout, stderr)
    """
    shell = True if isinstance(command, str) else False
    try:
        p = subprocess.Popen(
            command,
            stdout=subprocess.PIPE,  # Capture standard output
            stderr=subprocess.PIPE,  # Capture error output
            shell=shell,
        )
        raw_output, raw_err = p.communicate()
        rc = p.returncode
        # Decode byte stream to string
        output = raw_output.decode("utf-8").strip()
        err = raw_err.decode("utf-8").strip()
        return rc, output, err
    except FileNotFoundError:
        return 127, "", "Command not found"


def run_and_read_all(run_lambda, command):
    """Execute command, return output if successful, None otherwise"""
    rc, out, _ = run_lambda(command)
    if rc != 0:
        return None
    return out


def run_and_parse_first_match(run_lambda, command, regex):
    """Execute command and extract first regex match"""
    rc, out, _ = run_lambda(command)
    if rc != 0:
        return None
    match = re.search(regex, out)
    if match is None:
        return None
    return match.group(1)


# Check if PyTorch is available
try:
    import torch

    TORCH_AVAILABLE = True
except (ImportError, NameError, AttributeError, OSError):
    TORCH_AVAILABLE = False


# =============================================================================
# Part 2: General System Information Collection (Reusing vLLM Original Logic)
# =============================================================================


def get_platform():
    """Get operating system platform"""
    if sys.platform.startswith("linux"):
        return "linux"
    elif sys.platform.startswith("win32"):
        return "win32"
    elif sys.platform.startswith("darwin"):
        return "darwin"
    return sys.platform


def get_os(run_lambda):
    """Get detailed operating system information"""
    from platform import machine

    if get_platform() == "linux":
        # Try reading /etc/*-release
        rc, out, _ = run_lambda(
            "cat /etc/*-release 2>/dev/null | grep PRETTY_NAME | head -1"
        )
        if rc == 0 and out:
            match = re.search(r'PRETTY_NAME="(.*)"', out)
            if match:
                return f"{match.group(1)} ({machine()})"
        # Fallback: use lsb_release
        rc, out, _ = run_lambda("lsb_release -d 2>/dev/null")
        if rc == 0 and out:
            match = re.search(r"Description:\s*(.*)", out)
            if match:
                return f"{match.group(1)} ({machine()})"
    return f"{get_platform()} ({machine()})"


def get_gcc_version(run_lambda):
    """Get GCC version"""
    return run_and_parse_first_match(run_lambda, "gcc --version", r"gcc (.*)")


def get_clang_version(run_lambda):
    """Get Clang version"""
    return run_and_parse_first_match(
        run_lambda, "clang --version", r"clang version (.*)"
    )


def get_cmake_version(run_lambda):
    """Get CMake version"""
    return run_and_parse_first_match(run_lambda, "cmake --version", r"cmake (.*)")


def get_libc_version():
    """Get libc version"""
    import platform

    if get_platform() != "linux":
        return "N/A"
    return "-".join(platform.libc_ver())


def get_python_platform():
    """Get Python platform information"""
    import platform

    return platform.platform()


def get_cpu_info(run_lambda):
    """Get CPU information"""
    if get_platform() == "linux":
        rc, out, err = run_lambda("lscpu")
        return out if rc == 0 else err
    return "N/A"


def get_pip_packages(run_lambda, patterns=None):
    """Get pip package list"""
    if patterns is None:
        patterns = {
            "torch",
            "numpy",
            "triton",
            "transformers",
            "vllm",
            "kunlun",
            "xpu",
            "bkcl",
            "xmlir",
        }

    cmd = [sys.executable, "-mpip", "list", "--format=freeze"]
    out = run_and_read_all(run_lambda, cmd)
    if out is None:
        return "pip3", ""

    filtered = "\n".join(
        line
        for line in out.splitlines()
        if any(name.lower() in line.lower() for name in patterns)
    )
    pip_version = "pip3" if sys.version[0] == "3" else "pip"
    return pip_version, filtered


def get_conda_packages(run_lambda, patterns=None):
    """Get conda package list"""
    if patterns is None:
        patterns = {
            "torch",
            "numpy",
            "triton",
            "transformers",
            "kunlun",
            "xpu",
            "bkcl",
            "xmlir",
        }

    conda = os.environ.get("CONDA_EXE", "conda")
    out = run_and_read_all(run_lambda, [conda, "list"])
    if out is None:
        return None

    return "\n".join(
        line
        for line in out.splitlines()
        if not line.startswith("#")
        and any(name.lower() in line.lower() for name in patterns)
    )


# =============================================================================
# Part 3: Kunlun-Specific Information Collection (Core Fix)
# =============================================================================


def parse_xpu_smi_output(run_lambda):
    """
    Parse the complete output of xpu-smi command
    [Principle Explanation]
    The xpu-smi output format is similar to nvidia-smi, we need to parse it with regex.
    Example output format:
    +-----------------------------------------------------------------------------+
    | XPU-SMI               Driver Version: 515.58       XPU-RT Version: N/A      |
    |-------------------------------+----------------------+----------------------+
    |   0  P800 OAM           N/A   | 00000000:52:00.0 N/A |                    0 |
    | N/A   43C  N/A     85W / 400W |      4MiB / 98304MiB |      0%      Default |
    Returns:
        dict: Dictionary containing parsing results
    """
    rc, output, _ = run_lambda("xpu-smi")
    if rc != 0 or not output:
        return None

    result = {
        "raw_output": output,
        "driver_version": None,
        "xre_version": None,
        "devices": [],
    }

    # Parse header: Driver Version and XPU-RT Version
    # Format: | XPU-SMI               Driver Version: 515.58       XPU-RT Version: N/A      |
    header_match = re.search(
        r"Driver Version:\s*(\S+)\s+XPU-RT Version:\s*(\S+)", output
    )
    if header_match:
        result["driver_version"] = header_match.group(1)
        xre = header_match.group(2)
        result["xre_version"] = xre if xre != "N/A" else None

    # Parse device information
    # Format: |   0  P800 OAM           N/A   | 00000000:52:00.0 N/A |
    # Following: | N/A   43C  N/A     85W / 400W |      4MiB / 98304MiB |

    # Find all device lines (containing device ID and name)
    device_pattern = re.compile(
        r"\|\s*(\d+)\s+(\S+(?:\s+\S+)?)\s+(?:N/A|On|Off)\s*\|"  # ID and Name
        r"\s*([0-9a-fA-F:\.]+)\s*"  # Bus-Id
    )

    # Find memory information
    memory_pattern = re.compile(
        r"\|\s*N/A\s+\d+C\s+N/A\s+\d+W\s*/\s*\d+W\s*\|"
        r"\s*(\d+)MiB\s*/\s*(\d+)MiB\s*\|"  # Memory-Usage / Total
    )

    lines = output.split("\n")
    i = 0
    while i < len(lines):
        line = lines[i]
        device_match = device_pattern.search(line)
        if device_match:
            device_id = int(device_match.group(1))
            device_name = device_match.group(2).strip()
            bus_id = device_match.group(3)

            # Next line should have memory info
            memory_used = 0
            memory_total = 0
            if i + 1 < len(lines):
                mem_match = memory_pattern.search(lines[i + 1])
                if mem_match:
                    memory_used = int(mem_match.group(1))
                    memory_total = int(mem_match.group(2))

            result["devices"].append(
                {
                    "id": device_id,
                    "name": device_name,  # This will correctly get "P800 OAM"
                    "bus_id": bus_id,
                    "memory_used_mib": memory_used,
                    "memory_total_mib": memory_total,
                }
            )
        i += 1

    return result


def get_kunlun_gpu_info(run_lambda):
    """
    Get Kunlun XPU device information
    [Fix Explanation]
    Previously used torch.cuda.get_device_properties() to get the name,
    but it only returns "GPU" (because Kunlun masquerades as CUDA).
    Now parse xpu-smi output to correctly get "P800 OAM".
    Returns:
        str: Device information string
    """
    parsed = parse_xpu_smi_output(run_lambda)

    if parsed and parsed["devices"]:
        # Get real device name from xpu-smi parsing
        lines = []
        for dev in parsed["devices"]:
            memory_gb = dev["memory_total_mib"] / 1024
            # Correctly display: XPU 0: P800 OAM (96.0GB)
            lines.append(f"XPU {dev['id']}: {dev['name']} ({memory_gb:.1f}GB)")
        return "\n".join(lines)

    # Fallback: Use PyTorch interface (but will display as GPU)
    if TORCH_AVAILABLE:
        try:
            device_count = torch.cuda.device_count()
            lines = []
            for i in range(device_count):
                props = torch.cuda.get_device_properties(i)
                name = props.name if hasattr(props, "name") else "Kunlun XPU"
                memory_gb = (
                    props.total_memory / (1024**3)
                    if hasattr(props, "total_memory")
                    else 0
                )
                lines.append(f"XPU {i}: {name} ({memory_gb:.1f}GB)")
            return "\n".join(lines)
        except Exception as e:
            return f"Error: {e}"

    return None


def get_kunlun_driver_version(run_lambda):
    """
    Get Kunlun driver version
    [Fix Explanation]
    Parse directly from xpu-smi output header instead of calling incorrect commands.
    Returns:
        str: Driver version, e.g., "515.58"
    """
    parsed = parse_xpu_smi_output(run_lambda)
    if parsed and parsed["driver_version"]:
        return parsed["driver_version"]
    return None


def get_kunlun_xre_version(run_lambda):
    """
    Get Kunlun XRE (Runtime) version
    [Fix Explanation]
    Previously used `xpu-smi --version` but that parameter doesn't exist.
    Now parse the "XPU-RT Version" field from xpu-smi standard output header.
    Returns:
        str: XRE version, or None (if showing N/A)
    """
    parsed = parse_xpu_smi_output(run_lambda)
    if parsed and parsed["xre_version"]:
        return parsed["xre_version"]
    return "N/A (not installed or not detected)"


def get_kunlun_topo(run_lambda):
    """
    Get Kunlun XPU topology information
    Returns:
        str: Topology information
    """
    # xpu-smi topo -m command can get topology
    output = run_and_read_all(run_lambda, "xpu-smi topo -m")
    if output:
        return output

    # Fallback: Show device count
    if TORCH_AVAILABLE:
        try:
            count = torch.cuda.device_count()
            return f"Detected {count} Kunlun XPU device(s)"
        except Exception:
            pass

    return None


def get_bkcl_version(run_lambda):
    """
    Get BKCL (communication library) version
    [Principle Explanation]
    BKCL = Baidu Kunlun Communication Library
    Similar to NVIDIA's NCCL, used for multi-card communication.
    Returns:
        str: BKCL version information
    """
    # Method 1: From your logs, BKCL prints version when loading
    # [WARN][BKCL][globals.cpp:268] xccl version: 6ab4ffb [rdma] ...
    # We can try importing related modules
    try:
        # Try getting from torch_xmlir
        import torch_xmlir

        # Find path to libbkcl.so
        bkcl_path = None
        if hasattr(torch_xmlir, "__file__"):
            import os

            base = os.path.dirname(torch_xmlir.__file__)
            candidate = os.path.join(base, "libbkcl.so")
            if os.path.exists(candidate):
                bkcl_path = candidate
        if bkcl_path:
            return f"Found at: {bkcl_path}"
    except ImportError:
        pass

    # Method 2: Search from ldconfig
    rc, out, _ = run_lambda("ldconfig -p 2>/dev/null | grep -i bkcl | head -1")
    if rc == 0 and out:
        return out

    return None


def get_vllm_kunlun_version():
    """
    Get vLLM-Kunlun version
    [Fix Explanation]
    Previously got hardcoded version "0.9.2" from vllm_kunlun.platforms.version,
    but actual pip installed version is "0.1.0".
    Now prioritize using importlib.metadata to get real installed version.
    Returns:
        str: Version number
    """
    # Method 1 (recommended): Use importlib.metadata (Python 3.8+)
    try:
        from importlib.metadata import version

        return version("vllm-kunlun")
    except Exception:
        pass

    # Method 2: Use pkg_resources
    try:
        import pkg_resources

        return pkg_resources.get_distribution("vllm-kunlun").version
    except Exception:
        pass

    # Method 3 (fallback): Get from code (may be inaccurate)
    try:
        from vllm_kunlun.platforms.version import get_xvllm_version

        return get_xvllm_version() + " (from code, may be inaccurate)"
    except ImportError:
        pass

    return "N/A"


def get_vllm_version():
    """Get vLLM main package version"""
    try:
        from importlib.metadata import version

        return version("vllm")
    except Exception:
        pass

    try:
        from vllm import __version__

        return __version__
    except ImportError:
        pass

    return "N/A"


# =============================================================================
# Part 4: Environment Variable Collection
# =============================================================================


def get_kunlun_env_vars():
    """Get Kunlun-related environment variables"""
    env_vars = ""
    kunlun_prefixes = (
        "XPU",
        "KUNLUN",
        "BKCL",
        "XCCL",
        "XRE",
        "TORCH",
        "VLLM",
    )
    secret_terms = ("secret", "token", "api", "access", "password")

    for k, v in sorted(os.environ.items()):
        if any(term in k.lower() for term in secret_terms):
            continue
        if any(k.upper().startswith(prefix) for prefix in kunlun_prefixes):
            env_vars += f"{k}={v}\n"

    return env_vars


# =============================================================================
# Part 5: Define Data Structure and Formatted Output
# =============================================================================

KunlunSystemEnv = namedtuple(
    "KunlunSystemEnv",
    [
        # General system information
        "os",
        "gcc_version",
        "clang_version",
        "cmake_version",
        "libc_version",
        "python_version",
        "python_platform",
        "pip_version",
        "pip_packages",
        "conda_packages",
        "cpu_info",
        # PyTorch information
        "torch_version",
        "is_debug_build",
        # Kunlun-specific information
        "kunlun_xpu_info",
        "kunlun_driver_version",
        "kunlun_xre_version",
        "bkcl_version",
        "kunlun_topo",
        # vLLM related
        "vllm_version",
        "vllm_kunlun_version",
        "env_vars",
    ],
)


def get_kunlun_env_info():
    """Collect all environment information"""
    run_lambda = run
    pip_version, pip_list_output = get_pip_packages(run_lambda)

    # PyTorch information
    if TORCH_AVAILABLE:
        torch_version = torch.__version__
        debug_mode_str = str(torch.version.debug)
    else:
        torch_version = "N/A"
        debug_mode_str = "N/A"

    sys_version = sys.version.replace("\n", " ")

    return KunlunSystemEnv(
        # General system information
        os=get_os(run_lambda),
        gcc_version=get_gcc_version(run_lambda),
        clang_version=get_clang_version(run_lambda),
        cmake_version=get_cmake_version(run_lambda),
        libc_version=get_libc_version(),
        python_version=f"{sys_version} ({sys.maxsize.bit_length() + 1}-bit runtime)",
        python_platform=get_python_platform(),
        pip_version=pip_version,
        pip_packages=pip_list_output,
        conda_packages=get_conda_packages(run_lambda),
        cpu_info=get_cpu_info(run_lambda),
        # PyTorch information
        torch_version=torch_version,
        is_debug_build=debug_mode_str,
        # Kunlun-specific information
        kunlun_xpu_info=get_kunlun_gpu_info(run_lambda),
        kunlun_driver_version=get_kunlun_driver_version(run_lambda),
        kunlun_xre_version=get_kunlun_xre_version(run_lambda),
        bkcl_version=get_bkcl_version(run_lambda),
        kunlun_topo=get_kunlun_topo(run_lambda),
        # vLLM related
        vllm_version=get_vllm_version(),
        vllm_kunlun_version=get_vllm_kunlun_version(),
        env_vars=get_kunlun_env_vars(),
    )


# Output format template
kunlun_env_info_fmt = """
==============================
        System Info
==============================
OS                           : {os}
GCC version                  : {gcc_version}
Clang version                : {clang_version}
CMake version                : {cmake_version}
Libc version                 : {libc_version}
==============================
       PyTorch Info
==============================
PyTorch version              : {torch_version}
Is debug build               : {is_debug_build}
==============================
      Python Environment
==============================
Python version               : {python_version}
Python platform              : {python_platform}
==============================
    Kunlun / XPU Info
==============================
XPU models and configuration :
{kunlun_xpu_info}
Kunlun driver version        : {kunlun_driver_version}
XRE (Runtime) version        : {kunlun_xre_version}
BKCL version                 : {bkcl_version}
XPU Topology:
{kunlun_topo}
==============================
          CPU Info
==============================
{cpu_info}
==============================
Versions of relevant libraries
==============================
{pip_packages}
{conda_packages}
==============================
      vLLM-Kunlun Info
==============================
vLLM Version                 : {vllm_version}
vLLM-Kunlun Version          : {vllm_kunlun_version}
==============================
     Environment Variables
==============================
{env_vars}
""".strip()


def pretty_str(envinfo):
    """Format environment information"""
    mutable_dict = envinfo._asdict()

    # Replace None with "Could not collect"
    for key in mutable_dict:
        if mutable_dict[key] is None:
            mutable_dict[key] = "Could not collect"

    # Handle pip package list
    if mutable_dict["pip_packages"]:
        mutable_dict["pip_packages"] = "\n".join(
            f"[{envinfo.pip_version}] {line}"
            for line in mutable_dict["pip_packages"].split("\n")
            if line
        )
    else:
        mutable_dict["pip_packages"] = "No relevant packages"

    # Handle conda package list
    if mutable_dict["conda_packages"]:
        mutable_dict["conda_packages"] = "\n".join(
            f"[conda] {line}"
            for line in mutable_dict["conda_packages"].split("\n")
            if line
        )
    else:
        mutable_dict["conda_packages"] = ""

    return kunlun_env_info_fmt.format(**mutable_dict)


def get_pretty_kunlun_env_info():
    """Get formatted environment information"""
    return pretty_str(get_kunlun_env_info())


def main():
    """Main entry point"""
    print("Collecting Kunlun XPU environment information...")
    output = get_pretty_kunlun_env_info()
    print(output)


if __name__ == "__main__":
    main()
[Misc] add collect_env feat (#218) Signed-off-by: Lidang-Jiang <lidangjiang@gmail.com> 2026-02-27 12:19:58 +08:00			`# SPDX-License-Identifier: Apache-2.0`
			`# vLLM-Kunlun Environment Information Collection Tool (Fixed Version)`
			`"""`
			`Environment information collection script for Kunlun XPU`
			`Fixed the following issues:`
			`1. Device name displayed as "GPU" → Now correctly shows "P800 OAM"`
			`2. XRE version command error → Now parsed from xpu-smi output`
			`3. vLLM-Kunlun version hardcoded → Now fetched from pip package metadata`
			`"""`

			`import os`
			`import re`
			`import subprocess`
			`import sys`
			`from collections import namedtuple`

			`# =============================================================================`
			`# Part 1: Basic Utility Functions`
			`# =============================================================================`


			`def run(command):`
			`"""`
			`Execute shell command and return result`
			`[Principle Explanation - Web Development Analogy]`
			`This is like the fetch() function in frontend development, sending a request and getting a response.`
			`- command: The command to execute (similar to a URL)`
			`- returns: (return_code, stdout, stderr)`
			`Args:`
			`command: Command as string or list`
			`Returns:`
			`tuple: (return_code, stdout, stderr)`
			`"""`
			`shell = True if isinstance(command, str) else False`
			`try:`
			`p = subprocess.Popen(`
			`command,`
			`stdout=subprocess.PIPE, # Capture standard output`
			`stderr=subprocess.PIPE, # Capture error output`
			`shell=shell,`
			`)`
			`raw_output, raw_err = p.communicate()`
			`rc = p.returncode`
			`# Decode byte stream to string`
			`output = raw_output.decode("utf-8").strip()`
			`err = raw_err.decode("utf-8").strip()`
			`return rc, output, err`
			`except FileNotFoundError:`
			`return 127, "", "Command not found"`


			`def run_and_read_all(run_lambda, command):`
			`"""Execute command, return output if successful, None otherwise"""`
			`rc, out, _ = run_lambda(command)`
			`if rc != 0:`
			`return None`
			`return out`


			`def run_and_parse_first_match(run_lambda, command, regex):`
			`"""Execute command and extract first regex match"""`
			`rc, out, _ = run_lambda(command)`
			`if rc != 0:`
			`return None`
			`match = re.search(regex, out)`
			`if match is None:`
			`return None`
			`return match.group(1)`


			`# Check if PyTorch is available`
			`try:`
			`import torch`

			`TORCH_AVAILABLE = True`
			`except (ImportError, NameError, AttributeError, OSError):`
			`TORCH_AVAILABLE = False`


			`# =============================================================================`
			`# Part 2: General System Information Collection (Reusing vLLM Original Logic)`
			`# =============================================================================`


			`def get_platform():`
			`"""Get operating system platform"""`
			`if sys.platform.startswith("linux"):`
			`return "linux"`
			`elif sys.platform.startswith("win32"):`
			`return "win32"`
			`elif sys.platform.startswith("darwin"):`
			`return "darwin"`
			`return sys.platform`


			`def get_os(run_lambda):`
			`"""Get detailed operating system information"""`
			`from platform import machine`

			`if get_platform() == "linux":`
			`# Try reading /etc/*-release`
			`rc, out, _ = run_lambda(`
			`"cat /etc/*-release 2>/dev/null \| grep PRETTY_NAME \| head -1"`
			`)`
			`if rc == 0 and out:`
			`match = re.search(r'PRETTY_NAME="(.*)"', out)`
			`if match:`
			`return f"{match.group(1)} ({machine()})"`
			`# Fallback: use lsb_release`
			`rc, out, _ = run_lambda("lsb_release -d 2>/dev/null")`
			`if rc == 0 and out:`
			`match = re.search(r"Description:\s(.)", out)`
			`if match:`
			`return f"{match.group(1)} ({machine()})"`
			`return f"{get_platform()} ({machine()})"`


			`def get_gcc_version(run_lambda):`
			`"""Get GCC version"""`
			`return run_and_parse_first_match(run_lambda, "gcc --version", r"gcc (.*)")`


			`def get_clang_version(run_lambda):`
			`"""Get Clang version"""`
			`return run_and_parse_first_match(`
			`run_lambda, "clang --version", r"clang version (.*)"`
			`)`


			`def get_cmake_version(run_lambda):`
			`"""Get CMake version"""`
			`return run_and_parse_first_match(run_lambda, "cmake --version", r"cmake (.*)")`


			`def get_libc_version():`
			`"""Get libc version"""`
			`import platform`

			`if get_platform() != "linux":`
			`return "N/A"`
			`return "-".join(platform.libc_ver())`


			`def get_python_platform():`
			`"""Get Python platform information"""`
			`import platform`

			`return platform.platform()`


			`def get_cpu_info(run_lambda):`
			`"""Get CPU information"""`
			`if get_platform() == "linux":`
			`rc, out, err = run_lambda("lscpu")`
			`return out if rc == 0 else err`
			`return "N/A"`


			`def get_pip_packages(run_lambda, patterns=None):`
			`"""Get pip package list"""`
			`if patterns is None:`
			`patterns = {`
			`"torch",`
			`"numpy",`
			`"triton",`
			`"transformers",`
			`"vllm",`
			`"kunlun",`
			`"xpu",`
			`"bkcl",`
			`"xmlir",`
			`}`

			`cmd = [sys.executable, "-mpip", "list", "--format=freeze"]`
			`out = run_and_read_all(run_lambda, cmd)`
			`if out is None:`
			`return "pip3", ""`

			`filtered = "\n".join(`
			`line`
			`for line in out.splitlines()`
			`if any(name.lower() in line.lower() for name in patterns)`
			`)`
			`pip_version = "pip3" if sys.version[0] == "3" else "pip"`
			`return pip_version, filtered`


			`def get_conda_packages(run_lambda, patterns=None):`
			`"""Get conda package list"""`
			`if patterns is None:`
			`patterns = {`
			`"torch",`
			`"numpy",`
			`"triton",`
			`"transformers",`
			`"kunlun",`
			`"xpu",`
			`"bkcl",`
			`"xmlir",`
			`}`

			`conda = os.environ.get("CONDA_EXE", "conda")`
			`out = run_and_read_all(run_lambda, [conda, "list"])`
			`if out is None:`
			`return None`

			`return "\n".join(`
			`line`
			`for line in out.splitlines()`
			`if not line.startswith("#")`
			`and any(name.lower() in line.lower() for name in patterns)`
			`)`


			`# =============================================================================`
			`# Part 3: Kunlun-Specific Information Collection (Core Fix)`
			`# =============================================================================`


			`def parse_xpu_smi_output(run_lambda):`
			`"""`
			`Parse the complete output of xpu-smi command`
			`[Principle Explanation]`
			`The xpu-smi output format is similar to nvidia-smi, we need to parse it with regex.`
			`Example output format:`
			`+-----------------------------------------------------------------------------+`
			`\| XPU-SMI Driver Version: 515.58 XPU-RT Version: N/A \|`
			`\|-------------------------------+----------------------+----------------------+`
			`\| 0 P800 OAM N/A \| 00000000:52:00.0 N/A \| 0 \|`
			`\| N/A 43C N/A 85W / 400W \| 4MiB / 98304MiB \| 0% Default \|`
			`Returns:`
			`dict: Dictionary containing parsing results`
			`"""`
			`rc, output, _ = run_lambda("xpu-smi")`
			`if rc != 0 or not output:`
			`return None`

			`result = {`
			`"raw_output": output,`
			`"driver_version": None,`
			`"xre_version": None,`
			`"devices": [],`
			`}`

			`# Parse header: Driver Version and XPU-RT Version`
			`# Format: \| XPU-SMI Driver Version: 515.58 XPU-RT Version: N/A \|`
			`header_match = re.search(`
			`r"Driver Version:\s(\S+)\s+XPU-RT Version:\s(\S+)", output`
			`)`
			`if header_match:`
			`result["driver_version"] = header_match.group(1)`
			`xre = header_match.group(2)`
			`result["xre_version"] = xre if xre != "N/A" else None`

			`# Parse device information`
			`# Format: \| 0 P800 OAM N/A \| 00000000:52:00.0 N/A \|`
			`# Following: \| N/A 43C N/A 85W / 400W \| 4MiB / 98304MiB \|`

			`# Find all device lines (containing device ID and name)`
			`device_pattern = re.compile(`
			`r"\\|\s(\d+)\s+(\S+(?:\s+\S+)?)\s+(?:N/A\|On\|Off)\s\\|" # ID and Name`
			`r"\s([0-9a-fA-F:\.]+)\s" # Bus-Id`
			`)`

			`# Find memory information`
			`memory_pattern = re.compile(`
			`r"\\|\sN/A\s+\d+C\s+N/A\s+\d+W\s/\s\d+W\s\\|"`
			`r"\s(\d+)MiB\s/\s(\d+)MiB\s\\|" # Memory-Usage / Total`
			`)`

			`lines = output.split("\n")`
			`i = 0`
			`while i < len(lines):`
			`line = lines[i]`
			`device_match = device_pattern.search(line)`
			`if device_match:`
			`device_id = int(device_match.group(1))`
			`device_name = device_match.group(2).strip()`
			`bus_id = device_match.group(3)`

			`# Next line should have memory info`
			`memory_used = 0`
			`memory_total = 0`
			`if i + 1 < len(lines):`
			`mem_match = memory_pattern.search(lines[i + 1])`
			`if mem_match:`
			`memory_used = int(mem_match.group(1))`
			`memory_total = int(mem_match.group(2))`

			`result["devices"].append(`
			`{`
			`"id": device_id,`
			`"name": device_name, # This will correctly get "P800 OAM"`
			`"bus_id": bus_id,`
			`"memory_used_mib": memory_used,`
			`"memory_total_mib": memory_total,`
			`}`
			`)`
			`i += 1`

			`return result`


			`def get_kunlun_gpu_info(run_lambda):`
			`"""`
			`Get Kunlun XPU device information`
			`[Fix Explanation]`
			`Previously used torch.cuda.get_device_properties() to get the name,`
			`but it only returns "GPU" (because Kunlun masquerades as CUDA).`
			`Now parse xpu-smi output to correctly get "P800 OAM".`
			`Returns:`
			`str: Device information string`
			`"""`
			`parsed = parse_xpu_smi_output(run_lambda)`

			`if parsed and parsed["devices"]:`
			`# Get real device name from xpu-smi parsing`
			`lines = []`
			`for dev in parsed["devices"]:`
			`memory_gb = dev["memory_total_mib"] / 1024`
			`# Correctly display: XPU 0: P800 OAM (96.0GB)`
			`lines.append(f"XPU {dev['id']}: {dev['name']} ({memory_gb:.1f}GB)")`
			`return "\n".join(lines)`

			`# Fallback: Use PyTorch interface (but will display as GPU)`
			`if TORCH_AVAILABLE:`
			`try:`
			`device_count = torch.cuda.device_count()`
			`lines = []`
			`for i in range(device_count):`
			`props = torch.cuda.get_device_properties(i)`
			`name = props.name if hasattr(props, "name") else "Kunlun XPU"`
			`memory_gb = (`
			`props.total_memory / (1024**3)`
			`if hasattr(props, "total_memory")`
			`else 0`
			`)`
			`lines.append(f"XPU {i}: {name} ({memory_gb:.1f}GB)")`
			`return "\n".join(lines)`
			`except Exception as e:`
			`return f"Error: {e}"`

			`return None`


			`def get_kunlun_driver_version(run_lambda):`
			`"""`
			`Get Kunlun driver version`
			`[Fix Explanation]`
			`Parse directly from xpu-smi output header instead of calling incorrect commands.`
			`Returns:`
			`str: Driver version, e.g., "515.58"`
			`"""`
			`parsed = parse_xpu_smi_output(run_lambda)`
			`if parsed and parsed["driver_version"]:`
			`return parsed["driver_version"]`
			`return None`


			`def get_kunlun_xre_version(run_lambda):`
			`"""`
			`Get Kunlun XRE (Runtime) version`
			`[Fix Explanation]`
			Previously used `xpu-smi --version` but that parameter doesn't exist.
			`Now parse the "XPU-RT Version" field from xpu-smi standard output header.`
			`Returns:`
			`str: XRE version, or None (if showing N/A)`
			`"""`
			`parsed = parse_xpu_smi_output(run_lambda)`
			`if parsed and parsed["xre_version"]:`
			`return parsed["xre_version"]`
			`return "N/A (not installed or not detected)"`


			`def get_kunlun_topo(run_lambda):`
			`"""`
			`Get Kunlun XPU topology information`
			`Returns:`
			`str: Topology information`
			`"""`
			`# xpu-smi topo -m command can get topology`
			`output = run_and_read_all(run_lambda, "xpu-smi topo -m")`
			`if output:`
			`return output`

			`# Fallback: Show device count`
			`if TORCH_AVAILABLE:`
			`try:`
			`count = torch.cuda.device_count()`
			`return f"Detected {count} Kunlun XPU device(s)"`
			`except Exception:`
			`pass`

			`return None`


			`def get_bkcl_version(run_lambda):`
			`"""`
			`Get BKCL (communication library) version`
			`[Principle Explanation]`
			`BKCL = Baidu Kunlun Communication Library`
			`Similar to NVIDIA's NCCL, used for multi-card communication.`
			`Returns:`
			`str: BKCL version information`
			`"""`
			`# Method 1: From your logs, BKCL prints version when loading`
			`# [WARN][BKCL][globals.cpp:268] xccl version: 6ab4ffb [rdma] ...`
			`# We can try importing related modules`
			`try:`
			`# Try getting from torch_xmlir`
			`import torch_xmlir`

			`# Find path to libbkcl.so`
			`bkcl_path = None`
			`if hasattr(torch_xmlir, "__file__"):`
			`import os`

			`base = os.path.dirname(torch_xmlir.__file__)`
			`candidate = os.path.join(base, "libbkcl.so")`
			`if os.path.exists(candidate):`
			`bkcl_path = candidate`
			`if bkcl_path:`
			`return f"Found at: {bkcl_path}"`
			`except ImportError:`
			`pass`

			`# Method 2: Search from ldconfig`
			`rc, out, _ = run_lambda("ldconfig -p 2>/dev/null \| grep -i bkcl \| head -1")`
			`if rc == 0 and out:`
			`return out`

			`return None`


			`def get_vllm_kunlun_version():`
			`"""`
			`Get vLLM-Kunlun version`
			`[Fix Explanation]`
			`Previously got hardcoded version "0.9.2" from vllm_kunlun.platforms.version,`
			`but actual pip installed version is "0.1.0".`
			`Now prioritize using importlib.metadata to get real installed version.`
			`Returns:`
			`str: Version number`
			`"""`
			`# Method 1 (recommended): Use importlib.metadata (Python 3.8+)`
			`try:`
			`from importlib.metadata import version`

			`return version("vllm-kunlun")`
			`except Exception:`
			`pass`

			`# Method 2: Use pkg_resources`
			`try:`
			`import pkg_resources`

			`return pkg_resources.get_distribution("vllm-kunlun").version`
			`except Exception:`
			`pass`

			`# Method 3 (fallback): Get from code (may be inaccurate)`
			`try:`
			`from vllm_kunlun.platforms.version import get_xvllm_version`

			`return get_xvllm_version() + " (from code, may be inaccurate)"`
			`except ImportError:`
			`pass`

			`return "N/A"`


			`def get_vllm_version():`
			`"""Get vLLM main package version"""`
			`try:`
			`from importlib.metadata import version`

			`return version("vllm")`
			`except Exception:`
			`pass`

			`try:`
			`from vllm import __version__`

			`return __version__`
			`except ImportError:`
			`pass`

			`return "N/A"`


			`# =============================================================================`
			`# Part 4: Environment Variable Collection`
			`# =============================================================================`


			`def get_kunlun_env_vars():`
			`"""Get Kunlun-related environment variables"""`
			`env_vars = ""`
			`kunlun_prefixes = (`
			`"XPU",`
			`"KUNLUN",`
			`"BKCL",`
			`"XCCL",`
			`"XRE",`
			`"TORCH",`
			`"VLLM",`
			`)`
			`secret_terms = ("secret", "token", "api", "access", "password")`

			`for k, v in sorted(os.environ.items()):`
			`if any(term in k.lower() for term in secret_terms):`
			`continue`
			`if any(k.upper().startswith(prefix) for prefix in kunlun_prefixes):`
			`env_vars += f"{k}={v}\n"`

			`return env_vars`


			`# =============================================================================`
			`# Part 5: Define Data Structure and Formatted Output`
			`# =============================================================================`

			`KunlunSystemEnv = namedtuple(`
			`"KunlunSystemEnv",`
			`[`
			`# General system information`
			`"os",`
			`"gcc_version",`
			`"clang_version",`
			`"cmake_version",`
			`"libc_version",`
			`"python_version",`
			`"python_platform",`
			`"pip_version",`
			`"pip_packages",`
			`"conda_packages",`
			`"cpu_info",`
			`# PyTorch information`
			`"torch_version",`
			`"is_debug_build",`
			`# Kunlun-specific information`
			`"kunlun_xpu_info",`
			`"kunlun_driver_version",`
			`"kunlun_xre_version",`
			`"bkcl_version",`
			`"kunlun_topo",`
			`# vLLM related`
			`"vllm_version",`
			`"vllm_kunlun_version",`
			`"env_vars",`
			`],`
			`)`


			`def get_kunlun_env_info():`
			`"""Collect all environment information"""`
			`run_lambda = run`
			`pip_version, pip_list_output = get_pip_packages(run_lambda)`

			`# PyTorch information`
			`if TORCH_AVAILABLE:`
			`torch_version = torch.__version__`
			`debug_mode_str = str(torch.version.debug)`
			`else:`
			`torch_version = "N/A"`
			`debug_mode_str = "N/A"`

			`sys_version = sys.version.replace("\n", " ")`

			`return KunlunSystemEnv(`
			`# General system information`
			`os=get_os(run_lambda),`
			`gcc_version=get_gcc_version(run_lambda),`
			`clang_version=get_clang_version(run_lambda),`
			`cmake_version=get_cmake_version(run_lambda),`
			`libc_version=get_libc_version(),`
			`python_version=f"{sys_version} ({sys.maxsize.bit_length() + 1}-bit runtime)",`
			`python_platform=get_python_platform(),`
			`pip_version=pip_version,`
			`pip_packages=pip_list_output,`
			`conda_packages=get_conda_packages(run_lambda),`
			`cpu_info=get_cpu_info(run_lambda),`
			`# PyTorch information`
			`torch_version=torch_version,`
			`is_debug_build=debug_mode_str,`
			`# Kunlun-specific information`
			`kunlun_xpu_info=get_kunlun_gpu_info(run_lambda),`
			`kunlun_driver_version=get_kunlun_driver_version(run_lambda),`
			`kunlun_xre_version=get_kunlun_xre_version(run_lambda),`
			`bkcl_version=get_bkcl_version(run_lambda),`
			`kunlun_topo=get_kunlun_topo(run_lambda),`
			`# vLLM related`
			`vllm_version=get_vllm_version(),`
			`vllm_kunlun_version=get_vllm_kunlun_version(),`
			`env_vars=get_kunlun_env_vars(),`
			`)`


			`# Output format template`
			`kunlun_env_info_fmt = """`
			`==============================`
			`System Info`
			`==============================`
			`OS : {os}`
			`GCC version : {gcc_version}`
			`Clang version : {clang_version}`
			`CMake version : {cmake_version}`
			`Libc version : {libc_version}`
			`==============================`
			`PyTorch Info`
			`==============================`
			`PyTorch version : {torch_version}`
			`Is debug build : {is_debug_build}`
			`==============================`
			`Python Environment`
			`==============================`
			`Python version : {python_version}`
			`Python platform : {python_platform}`
			`==============================`
			`Kunlun / XPU Info`
			`==============================`
			`XPU models and configuration :`
			`{kunlun_xpu_info}`
			`Kunlun driver version : {kunlun_driver_version}`
			`XRE (Runtime) version : {kunlun_xre_version}`
			`BKCL version : {bkcl_version}`
			`XPU Topology:`
			`{kunlun_topo}`
			`==============================`
			`CPU Info`
			`==============================`
			`{cpu_info}`
			`==============================`
			`Versions of relevant libraries`
			`==============================`
			`{pip_packages}`
			`{conda_packages}`
			`==============================`
			`vLLM-Kunlun Info`
			`==============================`
			`vLLM Version : {vllm_version}`
			`vLLM-Kunlun Version : {vllm_kunlun_version}`
			`==============================`
			`Environment Variables`
			`==============================`
			`{env_vars}`
			`""".strip()`


			`def pretty_str(envinfo):`
			`"""Format environment information"""`
			`mutable_dict = envinfo._asdict()`

			`# Replace None with "Could not collect"`
			`for key in mutable_dict:`
			`if mutable_dict[key] is None:`
			`mutable_dict[key] = "Could not collect"`

			`# Handle pip package list`
			`if mutable_dict["pip_packages"]:`
			`mutable_dict["pip_packages"] = "\n".join(`
			`f"[{envinfo.pip_version}] {line}"`
			`for line in mutable_dict["pip_packages"].split("\n")`
			`if line`
			`)`
			`else:`
			`mutable_dict["pip_packages"] = "No relevant packages"`

			`# Handle conda package list`
			`if mutable_dict["conda_packages"]:`
			`mutable_dict["conda_packages"] = "\n".join(`
			`f"[conda] {line}"`
			`for line in mutable_dict["conda_packages"].split("\n")`
			`if line`
			`)`
			`else:`
			`mutable_dict["conda_packages"] = ""`

			`return kunlun_env_info_fmt.format(**mutable_dict)`


			`def get_pretty_kunlun_env_info():`
			`"""Get formatted environment information"""`
			`return pretty_str(get_kunlun_env_info())`


			`def main():`
			`"""Main entry point"""`
			`print("Collecting Kunlun XPU environment information...")`
			`output = get_pretty_kunlun_env_info()`
			`print(output)`


			`if __name__ == "__main__":`
			`main()`