Add supports_gradient_checkpointing
This commit is contained in:
@@ -63,6 +63,8 @@ class InternVLChatConfig(PretrainedConfig):
|
||||
self.ps_version = ps_version # pixel shuffle version
|
||||
self.min_dynamic_patch = min_dynamic_patch
|
||||
self.max_dynamic_patch = max_dynamic_patch
|
||||
# By default, we use tie_word_embeddings=False for models of all sizes.
|
||||
self.tie_word_embeddings = self.llm_config.tie_word_embeddings
|
||||
|
||||
logger.info(f'vision_select_layer: {self.select_layer}')
|
||||
logger.info(f'ps_version: {self.ps_version}')
|
||||
|
||||
@@ -364,6 +364,7 @@ class InternVisionEncoder(nn.Module):
|
||||
class InternVisionModel(PreTrainedModel):
|
||||
main_input_name = 'pixel_values'
|
||||
_supports_flash_attn_2 = True
|
||||
supports_gradient_checkpointing = True
|
||||
config_class = InternVisionConfig
|
||||
_no_split_modules = ['InternVisionEncoderLayer']
|
||||
|
||||
|
||||
@@ -37,6 +37,7 @@ class InternVLChatModel(PreTrainedModel):
|
||||
main_input_name = 'pixel_values'
|
||||
base_model_prefix = 'language_model'
|
||||
_supports_flash_attn_2 = True
|
||||
supports_gradient_checkpointing = True
|
||||
_no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer', 'Qwen2DecoderLayer']
|
||||
|
||||
def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None, use_flash_attn=True):
|
||||
@@ -346,3 +347,13 @@ class InternVLChatModel(PreTrainedModel):
|
||||
)
|
||||
|
||||
return outputs
|
||||
|
||||
@property
|
||||
def lm_head(self):
|
||||
return self.language_model.get_output_embeddings()
|
||||
|
||||
def get_input_embeddings(self):
|
||||
return self.language_model.get_input_embeddings()
|
||||
|
||||
def get_output_embeddings(self):
|
||||
return self.language_model.get_output_embeddings()
|
||||
|
||||
Reference in New Issue
Block a user