################################################################################ # Copyright(c)2020-2025 Shanghai Biren Technology Co., Ltd. All rights reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ################################################################################ import enum from vllm.config.compilation import CUDAGraphMode class CompilationLevel: # constants for the levels of the compilation process NO_COMPILATION = 0 DYNAMO_AS_IS = 1 DYNAMO_ONCE = 2 PIECEWISE = 3 class SUPAGraphMode(enum.Enum): """ Constants for the supagraph mode in CompilationConfig. Meanwhile, the subset enum `NONE`, `PIECEWISE` and `FULL` are also treated as concrete runtime mode for supagraph runtime dispatching. """ NONE = 0 PIECEWISE = 1 FULL = 2 FULL_DECODE_ONLY = (FULL, NONE) FULL_AND_PIECEWISE = (FULL, PIECEWISE) def decode_mode(self) -> 'SUPAGraphMode': return SUPAGraphMode(self.value[0]) if \ self.separate_routine() else self def mixed_mode(self) -> 'SUPAGraphMode': return SUPAGraphMode(self.value[1]) if \ self.separate_routine() else self def requires_piecewise_compilation(self) -> bool: return (self.decode_mode() == SUPAGraphMode.PIECEWISE or self.mixed_mode() == SUPAGraphMode.PIECEWISE) def max_supagraph_mode(self) -> 'SUPAGraphMode': return SUPAGraphMode(max( self.value)) if self.separate_routine() else self def has_full_supagraphs(self) -> bool: return self.max_supagraph_mode() == SUPAGraphMode.FULL # ychun, trick for CUDAGraphMode def has_full_cudagraphs(self) -> bool: cuda_graph_mode = CUDAGraphMode(max( self.value)) if self.separate_routine() else self return cuda_graph_mode == CUDAGraphMode.FULL def separate_routine(self) -> bool: return isinstance(self.value, tuple)