[Model] Support DeepSeek-V4
This commit is contained in:
33
vllm_mlu/model_executor/warmup/kernel_warmup.py
Normal file
33
vllm_mlu/model_executor/warmup/kernel_warmup.py
Normal file
@@ -0,0 +1,33 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM-MLU project
|
||||
"""
|
||||
Warmup kernels used during model execution.
|
||||
This is useful specifically for JIT'ed kernels as we don't want JIT'ing to
|
||||
happen during model execution.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from vllm.logger import init_logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from vllm.v1.worker.gpu_worker import Worker
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
def kernel_warmup(worker: "Worker"):
|
||||
'''
|
||||
=============================
|
||||
Modify by vllm_mlu
|
||||
=============================
|
||||
@brief: skip deep GEMM warmup, flashinfer autotune, and
|
||||
flash infer attention warmup
|
||||
'''
|
||||
|
||||
'''
|
||||
==================
|
||||
End of MLU Hijack
|
||||
==================
|
||||
'''
|
||||
pass
|
||||
Reference in New Issue
Block a user