Files

33 lines
772 B
Python
Raw Permalink Normal View History

2026-04-24 09:50:34 +08:00
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM-MLU project
"""
Warmup kernels used during model execution.
This is useful specifically for JIT'ed kernels as we don't want JIT'ing to
happen during model execution.
"""
from typing import TYPE_CHECKING
from vllm.logger import init_logger
if TYPE_CHECKING:
from vllm.v1.worker.gpu_worker import Worker
logger = init_logger(__name__)
def kernel_warmup(worker: "Worker"):
'''
=============================
Modify by vllm_mlu
=============================
@brief: skip deep GEMM warmup, flashinfer autotune, and
flash infer attention warmup
'''
'''
==================
End of MLU Hijack
==================
'''
pass