# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM-MLU project import os import torch import vllm.envs as envs def _check_env(env, default=False): if env in os.environ: return os.environ[env].lower() in ["true", "1"] return default def _check_env_value(env, default=0): if env in os.environ: if not os.environ[env].isdigit(): raise ValueError(f"'{env}' should be set with integer") value = int(os.environ[env]) return value return default def _check_env_float(env, default=0): if env in os.environ: try: value = float(os.environ[env]) except ValueError: raise ValueError(f"'{env}' should be set with float") return value return default # VLLM_LATENCY_DEBUG: Get more kernel info for benchmark latency. VLLM_LATENCY_DEBUG = _check_env("VLLM_LATENCY_DEBUG", default=False) # VLLM_LATENCY_DEBUG_NO_DEVICE: Get more kernel info(without device) for benchmark latency. VLLM_LATENCY_DEBUG_NO_DEVICE = _check_env("VLLM_LATENCY_DEBUG_NO_DEVICE", default=False) # VLLM_DUMP_TENSORS: Dump each layer outputs when running vLLM inference. VLLM_DUMP_OUTPUTS = _check_env("VLLM_DUMP_OUTPUTS", default=False) # VLLM_DUMP_MLU_INFO: Get device info when running vLLM inference. VLLM_DUMP_MLU_INFO = _check_env("VLLM_DUMP_MLU_INFO", default=False) # VLLM_DUMP_MLU_INFO_DEBUG: Dump device debug info when running vLLM inference. VLLM_DUMP_MLU_INFO_DEBUG = _check_env("VLLM_DUMP_MLU_INFO_DEBUG", default=False) # VLLM_SCHEDULER_PROFILE: Profiling vLLM scheduler. VLLM_SCHEDULER_PROFILE = _check_env("VLLM_SCHEDULER_PROFILE", default=False) # VLLM_GRAPH_DEBUG: Debug the graph status when running decoder, default value is True. # Set to False to disable warning messages. VLLM_GRAPH_DEBUG = _check_env("VLLM_GRAPH_DEBUG", default=True) # VLLM_AVG_MOE_EN: make moe experts workload balance, default value is False. VLLM_AVG_MOE_EN = _check_env("VLLM_AVG_MOE_EN", default=False) or _check_env("VLLM_RANDOM_MOE_EN", default=False) VLLM_RANDOM_MOE_EN = _check_env("VLLM_RANDOM_MOE_EN", default=False) # VLLM_LOGITS_USE_ALL_GATHER: use allgather for logits collection, default value is False. VLLM_LOGITS_USE_ALL_GATHER = _check_env("VLLM_LOGITS_USE_ALL_GATHER", default=False) VLLM_LATENCY_DEBUG_EN = (VLLM_LATENCY_DEBUG or VLLM_LATENCY_DEBUG_NO_DEVICE) VLLM_LATENCY_DEBUG_WITH_DEVICE_EN = (VLLM_LATENCY_DEBUG and not VLLM_LATENCY_DEBUG_NO_DEVICE) VLLM_DUMP_MLU_INFO_EN = (VLLM_LATENCY_DEBUG_WITH_DEVICE_EN and VLLM_DUMP_MLU_INFO) VLLM_DUMP_MLU_INFO_DEBUG = (VLLM_DUMP_MLU_INFO_DEBUG and VLLM_DUMP_MLU_INFO_EN) # VLLM_V1_USE_UNCHUNK_SCHED: v1 use unchunk scheduler, default value is True. VLLM_V1_USE_UNCHUNK_SCHED = _check_env("VLLM_V1_USE_UNCHUNK_SCHED", default=True) # VLLM_V1_MIN_PREFILL_BATCH: the min scheduling batch in v1, default is 1. VLLM_V1_MIN_PREFILL_BATCH = _check_env_value("VLLM_V1_MIN_PREFILL_BATCH", default=1) # VLLM_V1_USE_FULL_GRAPH: v1 use full graph capture, default value is True. VLLM_V1_USE_FULL_GRAPH = _check_env("VLLM_V1_USE_FULL_GRAPH", default=True) # VLLM_V1_BENCHMARK: v1 benchmark, default value is False. VLLM_V1_BENCHMARK = _check_env("VLLM_V1_BENCHMARK", default=False) # VLLM_MTP_DEBUG: use to show mtp accepted rate, default value is False. VLLM_MTP_DEBUG = _check_env("VLLM_MTP_DEBUG", default=False) # VLLM_MTP_NO_QUANT: mtp use origin dtype, quant_config use None VLLM_MTP_NO_QUANT = _check_env("VLLM_MTP_NO_QUANT", default=False) # VLLM_MTP_FIXED_ACCEPTANCE_RATE: use fixed acceptance rate, default value is None. VLLM_MTP_FIXED_ACCEPTANCE_RATE = _check_env_float("VLLM_MTP_FIXED_ACCEPTANCE_RATE", default=None) # VLLM_MTP_NO_QUANT: mtp use origin dtype, quant_config use None VLLM_MTP_NO_QUANT = _check_env("VLLM_MTP_NO_QUANT", default=False) # VLLM_V1_UNCHUNK_SCHED_LOG: print v1 unchunk scheduler state VLLM_V1_UNCHUNK_SCHED_LOG = _check_env("VLLM_V1_UNCHUNK_SCHED_LOG", default=False) # VLLM_MOE_PREFILL_CHUNK_SIZE: in number of tokens. enabled when > 0. VLLM_MOE_PREFILL_CHUNK_SIZE = _check_env_value("VLLM_MOE_PREFILL_CHUNK_SIZE", default=0) # VLLM_CI_ACCURACY_TEST: CI accuracy test, default value is False. VLLM_CI_ACCURACY_TEST = _check_env("VLLM_CI_ACCURACY_TEST", default=False) # VLLM_DISAGG_TRANS_ALL_BLOCKS: optimize the performance of disagg VLLM_DISAGG_TRANS_ALL_BLOCKS = _check_env("VLLM_DISAGG_TRANS_ALL_BLOCKS", default=True) # vllm disagg debug VLLM_DISAGG_CNPX_EXECUTE = _check_env("VLLM_DISAGG_CNPX_EXECUTE", default=False) VLLM_DISAGG_CNPX_REQUEST = _check_env("VLLM_DISAGG_CNPX_REQUEST", default=False) VLLM_DISAGG_FAKE_DECODER = _check_env("VLLM_DISAGG_FAKE_DECODER", default=False)