Files
2026-04-24 09:58:03 +08:00

33 lines
772 B
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM-MLU project
"""
Warmup kernels used during model execution.
This is useful specifically for JIT'ed kernels as we don't want JIT'ing to
happen during model execution.
"""
from typing import TYPE_CHECKING
from vllm.logger import init_logger
if TYPE_CHECKING:
from vllm.v1.worker.gpu_worker import Worker
logger = init_logger(__name__)
def kernel_warmup(worker: "Worker"):
'''
=============================
Modify by vllm_mlu
=============================
@brief: skip deep GEMM warmup, flashinfer autotune, and
flash infer attention warmup
'''
'''
==================
End of MLU Hijack
==================
'''
pass