80 lines
3.0 KiB
Python
80 lines
3.0 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM-MLU project
|
|
|
|
import importlib.util
|
|
from vllm_mlu._mlu_utils import *
|
|
from vllm_mlu.logger import logger
|
|
|
|
|
|
def is_module_available(module_name):
|
|
spec = importlib.util.find_spec(module_name)
|
|
return spec is not None
|
|
|
|
def check_environ_compatibility():
|
|
if is_module_available('apex'):
|
|
logger.error(f"The `apex` package is currently present in your environment, "
|
|
f"which may cause model accuracy issues or other problems. It is "
|
|
f"strongly recommended that you uninstall it before using vLLM.")
|
|
|
|
# Check environment compatibility first before applying mlu hijack.
|
|
check_environ_compatibility()
|
|
|
|
logger.info(f"[MLU] Apply Monkey Patch.")
|
|
|
|
# Apply v1 hijack
|
|
import vllm_mlu.v1.engine.core
|
|
import vllm_mlu.v1.engine.core_client
|
|
import vllm_mlu.v1.engine.llm_engine
|
|
import vllm_mlu.v1.engine.async_llm
|
|
import vllm_mlu.v1.core.sched.scheduler
|
|
import vllm_mlu.v1.core.single_type_kv_cache_manager
|
|
import vllm_mlu.v1.core.kv_cache_utils
|
|
import vllm_mlu.v1.core.kv_cache_manager
|
|
import vllm_mlu.v1.executor.abstract
|
|
import vllm_mlu.v1.executor.ray_executor
|
|
import vllm_mlu.v1.executor.multiproc_executor
|
|
import vllm_mlu.v1.sample.rejection_sampler
|
|
import vllm_mlu.v1.worker.lora_model_runner_mixin
|
|
import vllm_mlu.v1.worker.block_table
|
|
import vllm_mlu.v1.worker.gpu_input_batch
|
|
import vllm_mlu.v1.worker.kv_connector_model_runner_mixin
|
|
import vllm_mlu.v1.attention.backends.gdn_attn
|
|
import vllm_mlu.v1.attention.backends.mla.flashmla
|
|
import vllm_mlu.compilation.fix_functionalization
|
|
|
|
# Apply common hijack
|
|
import vllm_mlu.attention.layer
|
|
import vllm_mlu.benchmarks.datasets
|
|
import vllm_mlu.config.model
|
|
import vllm_mlu.config.scheduler
|
|
import vllm_mlu.config.speculative
|
|
import vllm_mlu.config.vllm
|
|
import vllm_mlu.utils
|
|
import vllm_mlu.distributed.parallel_state
|
|
import vllm_mlu.distributed.kv_transfer.kv_connector.factory
|
|
import vllm_mlu.engine.arg_utils
|
|
import vllm_mlu.entrypoints.llm
|
|
import vllm_mlu.lora.layers.base_linear
|
|
import vllm_mlu.lora.layers.row_parallel_linear
|
|
import vllm_mlu.lora.layers.column_parallel_linear
|
|
import vllm_mlu.model_executor.parameter
|
|
import vllm_mlu.model_executor.layers.linear
|
|
import vllm_mlu.model_executor.layers.rotary_embedding
|
|
import vllm_mlu.model_executor.layers.quantization.utils.w8a8_utils
|
|
import vllm_mlu.model_executor.layers.quantization.fp8
|
|
import vllm_mlu.model_executor.layers.activation
|
|
import vllm_mlu.model_executor.layers.layernorm
|
|
import vllm_mlu.model_executor.layers.fused_moe.layer
|
|
import vllm_mlu.model_executor.model_loader.tensorizer_loader
|
|
import vllm_mlu.model_executor.models.registry
|
|
import vllm_mlu.model_executor.models.config
|
|
import vllm_mlu.multimodal.utils
|
|
if is_module_available('lmcache'):
|
|
import vllm_mlu.distributed.kv_transfer.kv_connector.v1.lmcache_connector
|
|
|
|
if VLLM_CI_ACCURACY_TEST:
|
|
import vllm_mlu.model_executor.model_loader.dummy_loader
|
|
|
|
if VLLM_SCHEDULER_PROFILE:
|
|
import vllm_mlu.entrypoints.openai.api_server
|