Files
enginex-mlu590-vllm/vllm_mlu/mlu_hijack.py
2026-04-24 09:58:03 +08:00

80 lines
3.0 KiB
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM-MLU project
import importlib.util
from vllm_mlu._mlu_utils import *
from vllm_mlu.logger import logger
def is_module_available(module_name):
spec = importlib.util.find_spec(module_name)
return spec is not None
def check_environ_compatibility():
if is_module_available('apex'):
logger.error(f"The `apex` package is currently present in your environment, "
f"which may cause model accuracy issues or other problems. It is "
f"strongly recommended that you uninstall it before using vLLM.")
# Check environment compatibility first before applying mlu hijack.
check_environ_compatibility()
logger.info(f"[MLU] Apply Monkey Patch.")
# Apply v1 hijack
import vllm_mlu.v1.engine.core
import vllm_mlu.v1.engine.core_client
import vllm_mlu.v1.engine.llm_engine
import vllm_mlu.v1.engine.async_llm
import vllm_mlu.v1.core.sched.scheduler
import vllm_mlu.v1.core.single_type_kv_cache_manager
import vllm_mlu.v1.core.kv_cache_utils
import vllm_mlu.v1.core.kv_cache_manager
import vllm_mlu.v1.executor.abstract
import vllm_mlu.v1.executor.ray_executor
import vllm_mlu.v1.executor.multiproc_executor
import vllm_mlu.v1.sample.rejection_sampler
import vllm_mlu.v1.worker.lora_model_runner_mixin
import vllm_mlu.v1.worker.block_table
import vllm_mlu.v1.worker.gpu_input_batch
import vllm_mlu.v1.worker.kv_connector_model_runner_mixin
import vllm_mlu.v1.attention.backends.gdn_attn
import vllm_mlu.v1.attention.backends.mla.flashmla
import vllm_mlu.compilation.fix_functionalization
# Apply common hijack
import vllm_mlu.attention.layer
import vllm_mlu.benchmarks.datasets
import vllm_mlu.config.model
import vllm_mlu.config.scheduler
import vllm_mlu.config.speculative
import vllm_mlu.config.vllm
import vllm_mlu.utils
import vllm_mlu.distributed.parallel_state
import vllm_mlu.distributed.kv_transfer.kv_connector.factory
import vllm_mlu.engine.arg_utils
import vllm_mlu.entrypoints.llm
import vllm_mlu.lora.layers.base_linear
import vllm_mlu.lora.layers.row_parallel_linear
import vllm_mlu.lora.layers.column_parallel_linear
import vllm_mlu.model_executor.parameter
import vllm_mlu.model_executor.layers.linear
import vllm_mlu.model_executor.layers.rotary_embedding
import vllm_mlu.model_executor.layers.quantization.utils.w8a8_utils
import vllm_mlu.model_executor.layers.quantization.fp8
import vllm_mlu.model_executor.layers.activation
import vllm_mlu.model_executor.layers.layernorm
import vllm_mlu.model_executor.layers.fused_moe.layer
import vllm_mlu.model_executor.model_loader.tensorizer_loader
import vllm_mlu.model_executor.models.registry
import vllm_mlu.model_executor.models.config
import vllm_mlu.multimodal.utils
if is_module_available('lmcache'):
import vllm_mlu.distributed.kv_transfer.kv_connector.v1.lmcache_connector
if VLLM_CI_ACCURACY_TEST:
import vllm_mlu.model_executor.model_loader.dummy_loader
if VLLM_SCHEDULER_PROFILE:
import vllm_mlu.entrypoints.openai.api_server