Update README.md and config.json for Transformers v5 (batch 1/1)

This commit is contained in:
systemd
2026-02-07 03:16:38 +00:00
parent b0b57963f5
commit 54670ab76b
2 changed files with 356 additions and 1192 deletions

View File

@@ -1,5 +1,11 @@
# coding=utf-8
# Copyright 2021 The LG AI Research EXAONE Lab. All rights reserved.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
# This file was automatically generated from src/transformers/models/exaone/modular_exaone.py.
# Do NOT edit this file manually as any edits will be overwritten by the generation of
# the file from the modular. If any change should be done, please apply the change to the
# modular_exaone.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
# Copyright 2026 The LG AI Research and HuggingFace Inc. team. All rights reserved.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,15 +18,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""EXAONE model configuration"""
"""LG AI Research EXAONE Lab"""
from transformers.configuration_utils import PretrainedConfig
from transformers.utils import logging
logger = logging.get_logger(__name__)
EXAONE_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
from transformers.modeling_rope_utils import RopeParameters
class ExaoneConfig(PretrainedConfig):
@@ -114,6 +115,10 @@ class ExaoneConfig(PretrainedConfig):
Beginning of stream token id.
eos_token_id (`int`, *optional*, defaults to 2):
End of stream token id.
pad_token_id (`int`, *optional*):
Padding token id.
tie_word_embeddings (`bool`, *optional*, defaults to `False`):
Whether to tie weight embeddings
Example:
@@ -132,27 +137,32 @@ class ExaoneConfig(PretrainedConfig):
model_type = "exaone"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {"num_hidden_layers": "num_layers"}
attribute_map = {
"num_hidden_layers": "num_layers",
"hidden_act": "activation_function",
"rms_norm_eps": "layer_norm_epsilon",
}
def __init__(
self,
vocab_size=102400,
vocab_size: int | None = 102400,
max_position_embeddings=2048,
hidden_size=2048,
num_layers=32,
num_attention_heads=32,
num_key_value_heads=None,
intermediate_size=None,
activation_function="silu",
rope_theta=10000.0,
rope_scaling=None,
embed_dropout=0.0,
attention_dropout=0.0,
layer_norm_epsilon=1e-5,
initializer_range=0.02,
use_cache=True,
bos_token_id=0,
eos_token_id=2,
hidden_size: int | None = 2048,
num_layers: int | None = 32,
num_attention_heads: int | None = 32,
num_key_value_heads: int | None = None,
intermediate_size: int | None = None,
activation_function: str | None = "silu",
rope_parameters: RopeParameters | None = None,
embed_dropout: float | None = 0.0,
attention_dropout: float | None = 0.0,
layer_norm_epsilon: float | None = 1e-5,
initializer_range: float | None = 0.02,
use_cache: bool | None = True,
bos_token_id: int | None = 0,
eos_token_id: int | None = 2,
pad_token_id: int | None = None,
tie_word_embeddings: bool | None = False,
**kwargs,
):
self.vocab_size = vocab_size
@@ -160,6 +170,7 @@ class ExaoneConfig(PretrainedConfig):
self.hidden_size = hidden_size
self.num_layers = num_layers
self.num_attention_heads = num_attention_heads
self.num_layers = num_layers
if num_key_value_heads is None:
num_key_value_heads = num_attention_heads
self.num_key_value_heads = num_key_value_heads
@@ -173,10 +184,14 @@ class ExaoneConfig(PretrainedConfig):
self.layer_norm_epsilon = layer_norm_epsilon
self.initializer_range = initializer_range
self.use_cache = use_cache
self.rope_theta = rope_theta
self.rope_scaling = rope_scaling
self.rope_parameters = rope_parameters
self.bos_token_id = bos_token_id
self.eos_token_id = eos_token_id
self.pad_token_id = pad_token_id
self.tie_word_embeddings = tie_word_embeddings
super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
super().__init__(**kwargs)
__all__ = ["ExaoneConfig"]