enginex-biren-vllm/vllm_br/envs.py

################################################################################
# Copyright(c)2020-2025 Shanghai Biren Technology Co., Ltd. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
################################################################################

import os
from typing import Any, Callable, Dict

import pybrml
import torch
import torch_br

# The begin-* and end* here are used by the documentation generator
# to extract the used env vars.


# begin-env-vars-definition
def check_allreduce_available():
    P2P_DIRECT_LINK_TYPE = 2
    pybrml.brmlInit()
    device_count = pybrml.brmlDeviceGetCount()

    def is_p2p_direct_link(dev0, dev1):
        return pybrml.brmlDeviceGetP2PStatus_v3(
            dev0, dev1).type == P2P_DIRECT_LINK_TYPE

    def get_p2p_link_info(device_count):
        p2p_link_info = []
        for i in range(device_count):
            current_link_info = []
            current_dev = pybrml.brmlDeviceGetHandleByIndex(i)
            for j in range(device_count):
                other_dev = pybrml.brmlDeviceGetHandleByIndex(j)
                current_link_info.append(
                    is_p2p_direct_link(current_dev, other_dev))
            p2p_link_info.append(current_link_info)
        return p2p_link_info

    p2p_link_info = get_p2p_link_info(device_count)
    all_reduce_count = sum(p2p_link_info[0])
    all_reduce = 1
    if all_reduce_count == 3:
        all_reduce = 4
    elif all_reduce_count == 4:
        all_reduce = 8
    pybrml.brmlShutdown()
    return all_reduce


_VLLM_BR_USE_FUSED_ALLREDUCE_CACHE = check_allreduce_available()

env_variables: Dict[str, Callable[[], Any]] = {
    "VLLM_VERSION":
    lambda: os.getenv("VLLM_VERSION", None),
    "VLLM_BR_USE_PAGED_ATTN":
    lambda: os.getenv("VLLM_BR_USE_PAGED_ATTN", False),
    "VLLM_BR_WEIGHT_TYPE":
    lambda: os.getenv("VLLM_BR_WEIGHT_TYPE", "NUMA"),
    "VLLM_BR_QUANT_METHOD":
    lambda: os.getenv("VLLM_BR_QUANT_METHOD", "INT8"),
    "VLLM_BR_USE_FUSED_ALLREDUCE":
    lambda: int(
        os.getenv("VLLM_BR_USE_FUSED_ALLREDUCE",
                  _VLLM_BR_USE_FUSED_ALLREDUCE_CACHE)),
    "VLLM_BR_EMBEDDING_S0B":
    lambda: bool(int(os.getenv("VLLM_BR_EMBEDDING_S0B", False))),
    # MoE (DeepSeek)
    "VLLM_BR_STATIC_MOE_DECODER_MAX_LEN":
    lambda: int(os.getenv("VLLM_BR_STATIC_MOE_DECODER_MAX_LEN", "256")),
    # NOTE: following are device properties
    "VLLM_BR_DEVICE_SPC_NUM":
    lambda: int(
        os.getenv(
            "VLLM_BR_DEVICE_SPC_NUM",
            torch_br.supa.get_device_properties(torch.device("supa")).
            max_compute_units)),
    "VLLM_BR_DEVICE_WARP_SIZE":
    lambda: int(os.getenv("VLLM_BR_DEVICE_WARP_SIZE", 32)),
    "VLLM_BR_USE_CPU_ALL_REDUCE":
    lambda: int(os.getenv("VLLM_BR_USE_CPU_ALL_REDUCE", 0)),
    "VLLM_SCCL_SO_PATH":
    lambda: os.getenv(
        "VLLM_SCCL_SO_PATH",
        "/usr/local/birensupa/base/latest/succl/lib/x86_64-linux-gnu/libsuccl.so"
    ),
    "VLLM_RANDOMIZE_DP_DUMMY_INPUTS":
    lambda: bool(int(os.getenv("VLLM_RANDOMIZE_DP_DUMMY_INPUTS", False))),
    "VLLM_PP_CPU_SEND_RECV":
    lambda: bool(int(os.getenv("VLLM_PP_CPU_SEND_RECV", False))),
    "VLLM_BR_USE_FP32_ALL_REDUCE":
    lambda: int(os.getenv("VLLM_BR_USE_FP32_ALL_REDUCE", 0)),
    "VLLM_BR_USE_MROPE_0_9_2":
    lambda: bool(os.getenv("VLLM_BR_USE_MROPE_0_9_2", False)),
    "VLLM_BR_ENABLE_TP_GROUPS_IN_SUPERNODE":
    lambda: bool(int(os.getenv("VLLM_BR_ENABLE_TP_GROUPS_IN_SUPERNODE", "0"))),
}

# end-env-vars-definition


def __getattr__(name: str):
    # lazy evaluation of environment variables
    if name in env_variables:
        return env_variables[name]()
    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


def __dir__():
    return list(env_variables.keys())