33 lines
772 B
Python
33 lines
772 B
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM-MLU project
|
|
"""
|
|
Warmup kernels used during model execution.
|
|
This is useful specifically for JIT'ed kernels as we don't want JIT'ing to
|
|
happen during model execution.
|
|
"""
|
|
|
|
from typing import TYPE_CHECKING
|
|
|
|
from vllm.logger import init_logger
|
|
|
|
if TYPE_CHECKING:
|
|
from vllm.v1.worker.gpu_worker import Worker
|
|
|
|
logger = init_logger(__name__)
|
|
|
|
|
|
def kernel_warmup(worker: "Worker"):
|
|
'''
|
|
=============================
|
|
Modify by vllm_mlu
|
|
=============================
|
|
@brief: skip deep GEMM warmup, flashinfer autotune, and
|
|
flash infer attention warmup
|
|
'''
|
|
|
|
'''
|
|
==================
|
|
End of MLU Hijack
|
|
==================
|
|
'''
|
|
pass |