Update README.md and config.json for Transformers v5 (batch 1/1)

2026-02-07 03:16:38 +00:00
parent b0b57963f5
commit 54670ab76b
2 changed files with 356 additions and 1192 deletions
--- a/configuration_exaone.py
+++ b/configuration_exaone.py
@@ -1,5 +1,11 @@
-# coding=utf-8
-# Copyright 2021 The LG AI Research EXAONE Lab. All rights reserved.
+#                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
+#           This file was automatically generated from src/transformers/models/exaone/modular_exaone.py.
+#               Do NOT edit this file manually as any edits will be overwritten by the generation of
+#             the file from the modular. If any change should be done, please apply the change to the
+#                          modular_exaone.py file directly. One of our CI enforces this.
+#                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
+# Copyright 2026 The LG AI Research and HuggingFace Inc. team. All rights reserved.
+#
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,15 +18,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""EXAONE model configuration"""
+"""LG AI Research EXAONE Lab"""

 from transformers.configuration_utils import PretrainedConfig
-from transformers.utils import logging
-
-
-logger = logging.get_logger(__name__)
-
-EXAONE_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
+from transformers.modeling_rope_utils import RopeParameters


 class ExaoneConfig(PretrainedConfig):
@@ -114,6 +115,10 @@ class ExaoneConfig(PretrainedConfig):
            Beginning of stream token id.
        eos_token_id (`int`, *optional*, defaults to 2):
            End of stream token id.
+        pad_token_id (`int`, *optional*):
+            Padding token id.
+        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
+            Whether to tie weight embeddings

    Example:

@@ -132,27 +137,32 @@ class ExaoneConfig(PretrainedConfig):

    model_type = "exaone"
    keys_to_ignore_at_inference = ["past_key_values"]
-    attribute_map = {"num_hidden_layers": "num_layers"}
+    attribute_map = {
+        "num_hidden_layers": "num_layers",
+        "hidden_act": "activation_function",
+        "rms_norm_eps": "layer_norm_epsilon",
+    }

    def __init__(
        self,
-        vocab_size=102400,
+        vocab_size: int | None = 102400,
        max_position_embeddings=2048,
-        hidden_size=2048,
-        num_layers=32,
-        num_attention_heads=32,
-        num_key_value_heads=None,
-        intermediate_size=None,
-        activation_function="silu",
-        rope_theta=10000.0,
-        rope_scaling=None,
-        embed_dropout=0.0,
-        attention_dropout=0.0,
-        layer_norm_epsilon=1e-5,
-        initializer_range=0.02,
-        use_cache=True,
-        bos_token_id=0,
-        eos_token_id=2,
+        hidden_size: int | None = 2048,
+        num_layers: int | None = 32,
+        num_attention_heads: int | None = 32,
+        num_key_value_heads: int | None = None,
+        intermediate_size: int | None = None,
+        activation_function: str | None = "silu",
+        rope_parameters: RopeParameters | None = None,
+        embed_dropout: float | None = 0.0,
+        attention_dropout: float | None = 0.0,
+        layer_norm_epsilon: float | None = 1e-5,
+        initializer_range: float | None = 0.02,
+        use_cache: bool | None = True,
+        bos_token_id: int | None = 0,
+        eos_token_id: int | None = 2,
+        pad_token_id: int | None = None,
+        tie_word_embeddings: bool | None = False,
        **kwargs,
    ):
        self.vocab_size = vocab_size
@@ -160,6 +170,7 @@ class ExaoneConfig(PretrainedConfig):
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_attention_heads = num_attention_heads
+        self.num_layers = num_layers
        if num_key_value_heads is None:
            num_key_value_heads = num_attention_heads
        self.num_key_value_heads = num_key_value_heads
@@ -173,10 +184,14 @@ class ExaoneConfig(PretrainedConfig):
        self.layer_norm_epsilon = layer_norm_epsilon
        self.initializer_range = initializer_range
        self.use_cache = use_cache
-        self.rope_theta = rope_theta
-        self.rope_scaling = rope_scaling
+        self.rope_parameters = rope_parameters

        self.bos_token_id = bos_token_id
        self.eos_token_id = eos_token_id
+        self.pad_token_id = pad_token_id
+        self.tie_word_embeddings = tie_word_embeddings

-        super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
+        super().__init__(**kwargs)
+
+
+__all__ = ["ExaoneConfig"]