68 lines
2.4 KiB
Python
68 lines
2.4 KiB
Python
################################################################################
|
|
# Copyright(c)2020-2025 Shanghai Biren Technology Co., Ltd. All rights reserved.
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
################################################################################
|
|
|
|
import enum
|
|
|
|
from vllm.config.compilation import CUDAGraphMode
|
|
|
|
|
|
class CompilationLevel:
|
|
# constants for the levels of the compilation process
|
|
NO_COMPILATION = 0
|
|
DYNAMO_AS_IS = 1
|
|
DYNAMO_ONCE = 2
|
|
PIECEWISE = 3
|
|
|
|
|
|
class SUPAGraphMode(enum.Enum):
|
|
""" Constants for the supagraph mode in CompilationConfig.
|
|
Meanwhile, the subset enum `NONE`, `PIECEWISE` and `FULL` are also
|
|
treated as concrete runtime mode for supagraph runtime dispatching.
|
|
"""
|
|
NONE = 0
|
|
PIECEWISE = 1
|
|
FULL = 2
|
|
FULL_DECODE_ONLY = (FULL, NONE)
|
|
FULL_AND_PIECEWISE = (FULL, PIECEWISE)
|
|
|
|
def decode_mode(self) -> 'SUPAGraphMode':
|
|
return SUPAGraphMode(self.value[0]) if \
|
|
self.separate_routine() else self
|
|
|
|
def mixed_mode(self) -> 'SUPAGraphMode':
|
|
return SUPAGraphMode(self.value[1]) if \
|
|
self.separate_routine() else self
|
|
|
|
def requires_piecewise_compilation(self) -> bool:
|
|
return (self.decode_mode() == SUPAGraphMode.PIECEWISE
|
|
or self.mixed_mode() == SUPAGraphMode.PIECEWISE)
|
|
|
|
def max_supagraph_mode(self) -> 'SUPAGraphMode':
|
|
return SUPAGraphMode(max(
|
|
self.value)) if self.separate_routine() else self
|
|
|
|
def has_full_supagraphs(self) -> bool:
|
|
return self.max_supagraph_mode() == SUPAGraphMode.FULL
|
|
|
|
# ychun, trick for CUDAGraphMode
|
|
def has_full_cudagraphs(self) -> bool:
|
|
cuda_graph_mode = CUDAGraphMode(max(
|
|
self.value)) if self.separate_routine() else self
|
|
return cuda_graph_mode == CUDAGraphMode.FULL
|
|
|
|
def separate_routine(self) -> bool:
|
|
return isinstance(self.value, tuple)
|