Update README.md and config.json for Transformers v5
This commit is contained in:
@@ -1,5 +1,11 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2021 The LG AI Research EXAONE Lab. All rights reserved.
|
||||
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
||||
# This file was automatically generated from src/transformers/models/exaone/modular_exaone.py.
|
||||
# Do NOT edit this file manually as any edits will be overwritten by the generation of
|
||||
# the file from the modular. If any change should be done, please apply the change to the
|
||||
# modular_exaone.py file directly. One of our CI enforces this.
|
||||
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
||||
# Copyright 2026 The LG AI Research and HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -12,15 +18,10 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""EXAONE model configuration"""
|
||||
"""LG AI Research EXAONE Lab"""
|
||||
|
||||
from transformers.configuration_utils import PretrainedConfig
|
||||
from transformers.utils import logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
EXAONE_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
|
||||
from transformers.modeling_rope_utils import RopeParameters
|
||||
|
||||
|
||||
class ExaoneConfig(PretrainedConfig):
|
||||
@@ -114,6 +115,10 @@ class ExaoneConfig(PretrainedConfig):
|
||||
Beginning of stream token id.
|
||||
eos_token_id (`int`, *optional*, defaults to 2):
|
||||
End of stream token id.
|
||||
pad_token_id (`int`, *optional*):
|
||||
Padding token id.
|
||||
tie_word_embeddings (`bool`, *optional*, defaults to `False`):
|
||||
Whether to tie weight embeddings
|
||||
|
||||
Example:
|
||||
|
||||
@@ -132,27 +137,32 @@ class ExaoneConfig(PretrainedConfig):
|
||||
|
||||
model_type = "exaone"
|
||||
keys_to_ignore_at_inference = ["past_key_values"]
|
||||
attribute_map = {"num_hidden_layers": "num_layers"}
|
||||
attribute_map = {
|
||||
"num_hidden_layers": "num_layers",
|
||||
"hidden_act": "activation_function",
|
||||
"rms_norm_eps": "layer_norm_epsilon",
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vocab_size=102400,
|
||||
vocab_size: int | None = 102400,
|
||||
max_position_embeddings=2048,
|
||||
hidden_size=2048,
|
||||
num_layers=32,
|
||||
num_attention_heads=32,
|
||||
num_key_value_heads=None,
|
||||
intermediate_size=None,
|
||||
activation_function="silu",
|
||||
rope_theta=10000.0,
|
||||
rope_scaling=None,
|
||||
embed_dropout=0.0,
|
||||
attention_dropout=0.0,
|
||||
layer_norm_epsilon=1e-5,
|
||||
initializer_range=0.02,
|
||||
use_cache=True,
|
||||
bos_token_id=0,
|
||||
eos_token_id=2,
|
||||
hidden_size: int | None = 2048,
|
||||
num_layers: int | None = 32,
|
||||
num_attention_heads: int | None = 32,
|
||||
num_key_value_heads: int | None = None,
|
||||
intermediate_size: int | None = None,
|
||||
activation_function: str | None = "silu",
|
||||
rope_parameters: RopeParameters | None = None,
|
||||
embed_dropout: float | None = 0.0,
|
||||
attention_dropout: float | None = 0.0,
|
||||
layer_norm_epsilon: float | None = 1e-5,
|
||||
initializer_range: float | None = 0.02,
|
||||
use_cache: bool | None = True,
|
||||
bos_token_id: int | None = 0,
|
||||
eos_token_id: int | None = 2,
|
||||
pad_token_id: int | None = None,
|
||||
tie_word_embeddings: bool | None = False,
|
||||
**kwargs,
|
||||
):
|
||||
self.vocab_size = vocab_size
|
||||
@@ -174,10 +184,14 @@ class ExaoneConfig(PretrainedConfig):
|
||||
self.layer_norm_epsilon = layer_norm_epsilon
|
||||
self.initializer_range = initializer_range
|
||||
self.use_cache = use_cache
|
||||
self.rope_theta = rope_theta
|
||||
self.rope_scaling = rope_scaling
|
||||
self.rope_parameters = rope_parameters
|
||||
|
||||
self.bos_token_id = bos_token_id
|
||||
self.eos_token_id = eos_token_id
|
||||
self.pad_token_id = pad_token_id
|
||||
self.tie_word_embeddings = tie_word_embeddings
|
||||
|
||||
super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
|
||||
super().__init__(**kwargs)
|
||||
|
||||
|
||||
__all__ = ["ExaoneConfig"]
|
||||
|
||||
1457
modeling_exaone.py
1457
modeling_exaone.py
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user