Update README.md and config.json for Transformers v5 (batch 1/1)
This commit is contained in:
@@ -1,5 +1,11 @@
|
|||||||
# coding=utf-8
|
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
||||||
# Copyright 2021 The LG AI Research EXAONE Lab. All rights reserved.
|
# This file was automatically generated from src/transformers/models/exaone/modular_exaone.py.
|
||||||
|
# Do NOT edit this file manually as any edits will be overwritten by the generation of
|
||||||
|
# the file from the modular. If any change should be done, please apply the change to the
|
||||||
|
# modular_exaone.py file directly. One of our CI enforces this.
|
||||||
|
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
||||||
|
# Copyright 2026 The LG AI Research and HuggingFace Inc. team. All rights reserved.
|
||||||
|
#
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
@@ -12,15 +18,10 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
"""EXAONE model configuration"""
|
"""LG AI Research EXAONE Lab"""
|
||||||
|
|
||||||
from transformers.configuration_utils import PretrainedConfig
|
from transformers.configuration_utils import PretrainedConfig
|
||||||
from transformers.utils import logging
|
from transformers.modeling_rope_utils import RopeParameters
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
|
||||||
|
|
||||||
EXAONE_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
|
|
||||||
|
|
||||||
|
|
||||||
class ExaoneConfig(PretrainedConfig):
|
class ExaoneConfig(PretrainedConfig):
|
||||||
@@ -114,6 +115,10 @@ class ExaoneConfig(PretrainedConfig):
|
|||||||
Beginning of stream token id.
|
Beginning of stream token id.
|
||||||
eos_token_id (`int`, *optional*, defaults to 2):
|
eos_token_id (`int`, *optional*, defaults to 2):
|
||||||
End of stream token id.
|
End of stream token id.
|
||||||
|
pad_token_id (`int`, *optional*):
|
||||||
|
Padding token id.
|
||||||
|
tie_word_embeddings (`bool`, *optional*, defaults to `False`):
|
||||||
|
Whether to tie weight embeddings
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
@@ -132,27 +137,32 @@ class ExaoneConfig(PretrainedConfig):
|
|||||||
|
|
||||||
model_type = "exaone"
|
model_type = "exaone"
|
||||||
keys_to_ignore_at_inference = ["past_key_values"]
|
keys_to_ignore_at_inference = ["past_key_values"]
|
||||||
attribute_map = {"num_hidden_layers": "num_layers"}
|
attribute_map = {
|
||||||
|
"num_hidden_layers": "num_layers",
|
||||||
|
"hidden_act": "activation_function",
|
||||||
|
"rms_norm_eps": "layer_norm_epsilon",
|
||||||
|
}
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
vocab_size=102400,
|
vocab_size: int | None = 102400,
|
||||||
max_position_embeddings=2048,
|
max_position_embeddings=2048,
|
||||||
hidden_size=2048,
|
hidden_size: int | None = 2048,
|
||||||
num_layers=32,
|
num_layers: int | None = 32,
|
||||||
num_attention_heads=32,
|
num_attention_heads: int | None = 32,
|
||||||
num_key_value_heads=None,
|
num_key_value_heads: int | None = None,
|
||||||
intermediate_size=None,
|
intermediate_size: int | None = None,
|
||||||
activation_function="silu",
|
activation_function: str | None = "silu",
|
||||||
rope_theta=10000.0,
|
rope_parameters: RopeParameters | None = None,
|
||||||
rope_scaling=None,
|
embed_dropout: float | None = 0.0,
|
||||||
embed_dropout=0.0,
|
attention_dropout: float | None = 0.0,
|
||||||
attention_dropout=0.0,
|
layer_norm_epsilon: float | None = 1e-5,
|
||||||
layer_norm_epsilon=1e-5,
|
initializer_range: float | None = 0.02,
|
||||||
initializer_range=0.02,
|
use_cache: bool | None = True,
|
||||||
use_cache=True,
|
bos_token_id: int | None = 0,
|
||||||
bos_token_id=0,
|
eos_token_id: int | None = 2,
|
||||||
eos_token_id=2,
|
pad_token_id: int | None = None,
|
||||||
|
tie_word_embeddings: bool | None = False,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = vocab_size
|
||||||
@@ -160,6 +170,7 @@ class ExaoneConfig(PretrainedConfig):
|
|||||||
self.hidden_size = hidden_size
|
self.hidden_size = hidden_size
|
||||||
self.num_layers = num_layers
|
self.num_layers = num_layers
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = num_attention_heads
|
||||||
|
self.num_layers = num_layers
|
||||||
if num_key_value_heads is None:
|
if num_key_value_heads is None:
|
||||||
num_key_value_heads = num_attention_heads
|
num_key_value_heads = num_attention_heads
|
||||||
self.num_key_value_heads = num_key_value_heads
|
self.num_key_value_heads = num_key_value_heads
|
||||||
@@ -173,10 +184,14 @@ class ExaoneConfig(PretrainedConfig):
|
|||||||
self.layer_norm_epsilon = layer_norm_epsilon
|
self.layer_norm_epsilon = layer_norm_epsilon
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = initializer_range
|
||||||
self.use_cache = use_cache
|
self.use_cache = use_cache
|
||||||
self.rope_theta = rope_theta
|
self.rope_parameters = rope_parameters
|
||||||
self.rope_scaling = rope_scaling
|
|
||||||
|
|
||||||
self.bos_token_id = bos_token_id
|
self.bos_token_id = bos_token_id
|
||||||
self.eos_token_id = eos_token_id
|
self.eos_token_id = eos_token_id
|
||||||
|
self.pad_token_id = pad_token_id
|
||||||
|
self.tie_word_embeddings = tie_word_embeddings
|
||||||
|
|
||||||
super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
|
super().__init__(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["ExaoneConfig"]
|
||||||
|
|||||||
1451
modeling_exaone.py
1451
modeling_exaone.py
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user