Sync from v0.13

2026-01-19 10:38:50 +08:00
parent b2ef04d792
commit 5aef6c175a
3714 changed files with 854317 additions and 89342 deletions
--- a/vllm/transformers_utils/configs/jais.py
+++ b/vllm/transformers_utils/configs/jais.py
@@ -1,6 +1,7 @@
-# coding=utf-8
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
 # Copyright 2023 The OpenAI Team Authors and HuggingFace Inc. team.
-# Copyright (c) 2024 - 2024 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
 # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 # Copyright 2023 Cerebras Systems.
 #
@@ -73,10 +74,9 @@ class JAISConfig(PretrainedConfig):
        use_cache (`bool`, *optional*, defaults to `True`):
            Whether or not the model should return the last key/values
            attentions (not used by all models).
-        scale_attn_by_inverse_layer_idx (`bool`, *optional*,
-            defaults to `False`):
-            Whether to additionally scale attention weights by
-            `1 / layer_idx + 1`.
+        scale_attn_by_inverse_layer_idx (`bool`, *optional*, default `True`):
+            Whether to additionally scale attention weights
+            by `1 / layer_idx + 1`.
        reorder_and_upcast_attn (`bool`, *optional*, defaults to `False`):
            Whether to scale keys (K) prior to computing attention
            (dot-product)
@@ -98,7 +98,7 @@ class JAISConfig(PretrainedConfig):
            Scale attention weights by dividing by hidden_size instead of
            sqrt(hidden_size). Need to set scale_attn_weights to `True` as
            well.
-        alibi_scaling (`Dict`, *optional*):
+        alibi_scaling (`dict`, *optional*):
            Dictionary containing the scaling configuration for ALiBi
            embeddings. Currently only supports linear
            scaling strategy. Can specify either the scaling `factor` (must be
@@ -108,7 +108,7 @@ class JAISConfig(PretrainedConfig):
            formats are `{"type": strategy name, "factor": scaling factor}` or
            `{"type": strategy name,
            "train_seq_len": training sequence length}`.
-        architectures (`List`, *optional*, defaults to ['JAISLMHeadModel']):
+        architectures (`list`, *optional*, defaults to ['JAISLMHeadModel']):
            architecture names for Jais.

    Example:
@@ -209,29 +209,35 @@ class JAISConfig(PretrainedConfig):
        if self.alibi_scaling is None:
            return

-        if (not isinstance(self.alibi_scaling, dict)
-                or len(self.alibi_scaling) != 2):
+        if not isinstance(self.alibi_scaling, dict) or len(self.alibi_scaling) != 2:
            raise ValueError(
-                "`alibi_scaling` must be a dictionary with two fields,"
+                "`alibi_scaling` must be a dictionary with two fields, "
                "`type` and `factor` or `type` and `train_seq_len`, "
-                f"got {self.alibi_scaling}")
+                f"got {self.alibi_scaling}"
+            )
        alibi_scaling_type = self.alibi_scaling.get("type", None)
        alibi_scaling_factor = self.alibi_scaling.get("factor", None)
        alibi_dynamic_scaling = self.alibi_scaling.get("train_seq_len", None)
        if alibi_scaling_type is None or alibi_scaling_type != "linear":
-            raise ValueError(f"`alibi_scaling`'s type field must be 'linear',"
-                             f"got {alibi_scaling_type}")
-        if (alibi_scaling_factor is not None
-                and not isinstance(alibi_scaling_factor, float)
-                or (alibi_scaling_factor is not None
-                    and alibi_scaling_factor <= 1.0)):
            raise ValueError(
-                f"`alibi_scaling`'s factor field must be a float > 1.0,"
-                f"got {alibi_scaling_factor}")
-        if (alibi_dynamic_scaling is not None
-                and not isinstance(alibi_dynamic_scaling, int)
-                or (alibi_dynamic_scaling is not None
-                    and alibi_dynamic_scaling <= 1)):
+                f"`alibi_scaling`'s type field must be 'linear', "
+                f"got {alibi_scaling_type}"
+            )
+        if (
+            alibi_scaling_factor is not None
+            and not isinstance(alibi_scaling_factor, float)
+            or (alibi_scaling_factor is not None and alibi_scaling_factor <= 1.0)
+        ):
            raise ValueError(
-                f"`alibi_scaling`'s `train_seq_len` field must be an"
-                f"integer > 1, got {alibi_dynamic_scaling}")
+                f"`alibi_scaling`'s factor field must be a float > 1.0, "
+                f"got {alibi_scaling_factor}"
+            )
+        if (
+            alibi_dynamic_scaling is not None
+            and not isinstance(alibi_dynamic_scaling, int)
+            or (alibi_dynamic_scaling is not None and alibi_dynamic_scaling <= 1)
+        ):
+            raise ValueError(
+                f"`alibi_scaling`'s `train_seq_len` field must be an "
+                f"integer > 1, got {alibi_dynamic_scaling}"
+            )