Update README.md and config.json for Transformers v5 (batch 1/1)

This commit is contained in:
systemd
2026-02-07 03:16:38 +00:00
parent b0b57963f5
commit 54670ab76b
2 changed files with 356 additions and 1192 deletions

View File

@@ -1,5 +1,11 @@
# coding=utf-8 # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
# Copyright 2021 The LG AI Research EXAONE Lab. All rights reserved. # This file was automatically generated from src/transformers/models/exaone/modular_exaone.py.
# Do NOT edit this file manually as any edits will be overwritten by the generation of
# the file from the modular. If any change should be done, please apply the change to the
# modular_exaone.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
# Copyright 2026 The LG AI Research and HuggingFace Inc. team. All rights reserved.
#
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@@ -12,15 +18,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""EXAONE model configuration""" """LG AI Research EXAONE Lab"""
from transformers.configuration_utils import PretrainedConfig from transformers.configuration_utils import PretrainedConfig
from transformers.utils import logging from transformers.modeling_rope_utils import RopeParameters
logger = logging.get_logger(__name__)
EXAONE_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
class ExaoneConfig(PretrainedConfig): class ExaoneConfig(PretrainedConfig):
@@ -114,6 +115,10 @@ class ExaoneConfig(PretrainedConfig):
Beginning of stream token id. Beginning of stream token id.
eos_token_id (`int`, *optional*, defaults to 2): eos_token_id (`int`, *optional*, defaults to 2):
End of stream token id. End of stream token id.
pad_token_id (`int`, *optional*):
Padding token id.
tie_word_embeddings (`bool`, *optional*, defaults to `False`):
Whether to tie weight embeddings
Example: Example:
@@ -132,27 +137,32 @@ class ExaoneConfig(PretrainedConfig):
model_type = "exaone" model_type = "exaone"
keys_to_ignore_at_inference = ["past_key_values"] keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {"num_hidden_layers": "num_layers"} attribute_map = {
"num_hidden_layers": "num_layers",
"hidden_act": "activation_function",
"rms_norm_eps": "layer_norm_epsilon",
}
def __init__( def __init__(
self, self,
vocab_size=102400, vocab_size: int | None = 102400,
max_position_embeddings=2048, max_position_embeddings=2048,
hidden_size=2048, hidden_size: int | None = 2048,
num_layers=32, num_layers: int | None = 32,
num_attention_heads=32, num_attention_heads: int | None = 32,
num_key_value_heads=None, num_key_value_heads: int | None = None,
intermediate_size=None, intermediate_size: int | None = None,
activation_function="silu", activation_function: str | None = "silu",
rope_theta=10000.0, rope_parameters: RopeParameters | None = None,
rope_scaling=None, embed_dropout: float | None = 0.0,
embed_dropout=0.0, attention_dropout: float | None = 0.0,
attention_dropout=0.0, layer_norm_epsilon: float | None = 1e-5,
layer_norm_epsilon=1e-5, initializer_range: float | None = 0.02,
initializer_range=0.02, use_cache: bool | None = True,
use_cache=True, bos_token_id: int | None = 0,
bos_token_id=0, eos_token_id: int | None = 2,
eos_token_id=2, pad_token_id: int | None = None,
tie_word_embeddings: bool | None = False,
**kwargs, **kwargs,
): ):
self.vocab_size = vocab_size self.vocab_size = vocab_size
@@ -160,6 +170,7 @@ class ExaoneConfig(PretrainedConfig):
self.hidden_size = hidden_size self.hidden_size = hidden_size
self.num_layers = num_layers self.num_layers = num_layers
self.num_attention_heads = num_attention_heads self.num_attention_heads = num_attention_heads
self.num_layers = num_layers
if num_key_value_heads is None: if num_key_value_heads is None:
num_key_value_heads = num_attention_heads num_key_value_heads = num_attention_heads
self.num_key_value_heads = num_key_value_heads self.num_key_value_heads = num_key_value_heads
@@ -173,10 +184,14 @@ class ExaoneConfig(PretrainedConfig):
self.layer_norm_epsilon = layer_norm_epsilon self.layer_norm_epsilon = layer_norm_epsilon
self.initializer_range = initializer_range self.initializer_range = initializer_range
self.use_cache = use_cache self.use_cache = use_cache
self.rope_theta = rope_theta self.rope_parameters = rope_parameters
self.rope_scaling = rope_scaling
self.bos_token_id = bos_token_id self.bos_token_id = bos_token_id
self.eos_token_id = eos_token_id self.eos_token_id = eos_token_id
self.pad_token_id = pad_token_id
self.tie_word_embeddings = tie_word_embeddings
super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs) super().__init__(**kwargs)
__all__ = ["ExaoneConfig"]

File diff suppressed because it is too large Load Diff