init

2025-10-09 16:47:16 +08:00
parent c8feb4deb5
commit e27e3f16bb
5248 changed files with 1778505 additions and 0 deletions
--- a/transformers/setup.py
+++ b/transformers/setup.py
@@ -0,0 +1,470 @@
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Simple check list from AllenNLP repo: https://github.com/allenai/allennlp/blob/main/setup.py
+
+To create the package for pypi.
+
+1. Create the release branch named: v<RELEASE>-release, for example v4.19-release. For a patch release checkout the
+   current release branch.
+
+   If releasing on a special branch, copy the updated README.md on the main branch for the commit you will make
+   for the post-release and run `make fix-copies` on the main branch as well.
+
+2. Run `make pre-release` (or `make pre-patch` for a patch release) and commit these changes with the message:
+   "Release: <VERSION>" and push.
+
+3. Go back to the main branch and run `make post-release` then `make fix-copies`. Commit these changes with the
+   message "v<NEXT_VERSION>.dev.0" and push to main.
+
+# If you were just cutting the branch in preparation for a release, you can stop here for now.
+
+4. Wait for the tests on the release branch to be completed and be green (otherwise revert and fix bugs)
+
+5. On the release branch, add a tag in git to mark the release: "git tag v<VERSION> -m 'Adds tag v<VERSION> for pypi' "
+   Push the tag to git: git push --tags origin v<RELEASE>-release
+
+6. Build both the sources and the wheel. Do not change anything in setup.py between
+   creating the wheel and the source distribution (obviously).
+
+   Run `make build-release`. This will build the release and do some sanity checks for you. If this ends with an error
+   message, you need to fix things before going further.
+
+   You should now have a /dist directory with both .whl and .tar.gz source versions.
+
+7. Check that everything looks correct by uploading the package to the pypi test server:
+
+   twine upload dist/* -r testpypi
+   (pypi suggest using twine as other methods upload files via plaintext.)
+   You may have to specify the repository url, use the following command then:
+   twine upload dist/* -r testpypi --repository-url=https://test.pypi.org/legacy/
+
+   Check that you can install it in a virtualenv by running:
+   pip install -i https://test.pypi.org/simple/ transformers
+
+   Check you can run the following commands:
+   python -c "from transformers import pipeline; classifier = pipeline('text-classification'); print(classifier('What a nice release'))"
+   python -c "from transformers import *"
+   python utils/check_build.py --check_lib
+
+   If making a patch release, double check the bug you are patching is indeed resolved.
+
+8. Upload the final version to actual pypi:
+   twine upload dist/* -r pypi
+
+9. Copy the release notes from RELEASE.md to the tag in github once everything is looking hunky-dory.
+"""
+
+import os
+import re
+import shutil
+from pathlib import Path
+
+from setuptools import Command, find_packages, setup
+
+
+# Remove stale transformers.egg-info directory to avoid https://github.com/pypa/pip/issues/5466
+stale_egg_info = Path(__file__).parent / "transformers.egg-info"
+if stale_egg_info.exists():
+    print(
+        f"Warning: {stale_egg_info} exists.\n\n"
+        "If you recently updated transformers to 3.0 or later, this is expected,\n"
+        "but it may prevent transformers from installing in editable mode.\n\n"
+        "This directory is automatically generated by Python's packaging tools.\n"
+        "I will remove it now.\n\n"
+        "See https://github.com/pypa/pip/issues/5466 for details.\n"
+    )
+    shutil.rmtree(stale_egg_info)
+
+
+# IMPORTANT:
+# 1. all dependencies should be listed here with their version requirements if any
+# 2. once modified, run: `make deps_table_update` to update src/transformers/dependency_versions_table.py
+_deps = [
+    "Pillow>=10.0.1,<=15.0",
+    "accelerate>=0.26.0",
+    "av",
+    "beautifulsoup4",
+    "blobfile",
+    "codecarbon>=2.8.1",
+    "cookiecutter==1.7.3",
+    "dataclasses",
+    "datasets>=2.15.0",  # We need either this pin or pyarrow<21.0.0
+    "deepspeed>=0.9.3",
+    "diffusers",
+    "dill<0.3.5",
+    "evaluate>=0.2.0",
+    "faiss-cpu",
+    "fastapi",
+    "filelock",
+    "ftfy",
+    "fugashi>=1.0",
+    "GitPython<3.1.19",
+    "hf-doc-builder>=0.3.0",
+    "hf_xet",
+    "huggingface-hub==1.0.0.rc1",
+    "importlib_metadata",
+    "ipadic>=1.0.0,<2.0",
+    "jinja2>=3.1.0",
+    "kenlm",
+    "kernels>=0.10.2,<0.11",
+    "librosa",
+    "natten>=0.14.6,<0.15.0",
+    "nltk<=3.8.1",
+    "num2words",
+    "numpy>=1.17",
+    "onnxconverter-common",
+    "onnxruntime-tools>=1.4.2",
+    "onnxruntime>=1.4.0",
+    "openai>=1.98.0",
+    "opencv-python",
+    "optimum-benchmark>=0.3.0",
+    "optuna",
+    "pandas<2.3.0",  # `datasets` requires `pandas` while `pandas==2.3.0` has issues with CircleCI on 2025/06/05
+    "packaging>=20.0",
+    "parameterized>=0.9",  # older version of parameterized cause pytest collection to fail on .expand
+    "phonemizer",
+    "protobuf",
+    "psutil",
+    "pyyaml>=5.1",
+    "pydantic>=2",
+    "pytest>=7.2.0",
+    "pytest-asyncio",
+    "pytest-rerunfailures<16.0",
+    "pytest-timeout",
+    "pytest-xdist",
+    "pytest-order",
+    "python>=3.9.0",
+    "ray[tune]>=2.7.0",
+    "regex!=2019.12.17",
+    "requests",
+    "rhoknp>=1.1.0,<1.3.1",
+    "rjieba",
+    "rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1",
+    "ruff==0.13.1",
+    # `sacrebleu` not used in `transformers`. However, it is needed in several tests, when a test calls
+    # `evaluate.load("sacrebleu")`. This metric is used in the examples that we use to test the `Trainer` with, in the
+    # `Trainer` tests (see references to `run_translation.py`).
+    "sacrebleu>=1.4.12,<2.0.0",
+    "sacremoses",
+    "safetensors>=0.4.3",
+    "sagemaker>=2.31.0",
+    "schedulefree>=1.2.6",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece>=0.1.91,!=0.1.92",
+    "sigopt",
+    "starlette",
+    "sudachipy>=0.6.6",
+    "sudachidict_core>=20220729",
+    "tensorboard",
+    "timeout-decorator",
+    "tiktoken",
+    "timm<=1.0.19,!=1.0.18",
+    "tokenizers>=0.22.0,<=0.23.0",
+    "torch>=2.2",
+    "torchaudio",
+    "torchvision",
+    "pyctcdecode>=0.4.0",
+    "tqdm>=4.27",
+    "unidic>=1.0.2",
+    "unidic_lite>=1.0.7",
+    "urllib3<2.0.0",
+    "uvicorn",
+    "pytest-rich",
+    "libcst",
+    "rich",
+    "opentelemetry-api",
+    "mistral-common[opencv]>=1.6.3",
+]
+
+
+# this is a lookup table with items like:
+#
+# tokenizers: "tokenizers==0.9.4"
+# packaging: "packaging"
+#
+# some of the values are versioned whereas others aren't.
+deps = {b: a for a, b in (re.findall(r"^(([^!=<>~ ]+)(?:[!=<>~ ].*)?$)", x)[0] for x in _deps)}
+
+# since we save this data in src/transformers/dependency_versions_table.py it can be easily accessed from
+# anywhere. If you need to quickly access the data from this table in a shell, you can do so easily with:
+#
+# python -c 'import sys; from transformers.dependency_versions_table import deps; \
+# print(" ".join([ deps[x] for x in sys.argv[1:]]))' tokenizers datasets
+#
+# Just pass the desired package names to that script as it's shown with 2 packages above.
+#
+# If transformers is not yet installed and the work is done from the cloned repo remember to add `PYTHONPATH=src` to the script above
+#
+# You can then feed this for example to `pip`:
+#
+# pip install -U $(python -c 'import sys; from transformers.dependency_versions_table import deps; \
+# print(" ".join([deps[x] for x in sys.argv[1:]]))' tokenizers datasets)
+#
+
+
+def deps_list(*pkgs):
+    return [deps[pkg] for pkg in pkgs]
+
+
+class DepsTableUpdateCommand(Command):
+    """
+    A custom distutils command that updates the dependency table.
+    usage: python setup.py deps_table_update
+    """
+
+    description = "build runtime dependency table"
+    user_options = [
+        # format: (long option, short option, description).
+        ("dep-table-update", None, "updates src/transformers/dependency_versions_table.py"),
+    ]
+
+    def initialize_options(self):
+        pass
+
+    def finalize_options(self):
+        pass
+
+    def run(self):
+        entries = "\n".join([f'    "{k}": "{v}",' for k, v in deps.items()])
+        content = [
+            "# THIS FILE HAS BEEN AUTOGENERATED. To update:",
+            "# 1. modify the `_deps` dict in setup.py",
+            "# 2. run `make deps_table_update``",
+            "deps = {",
+            entries,
+            "}",
+            "",
+        ]
+        target = "src/transformers/dependency_versions_table.py"
+        print(f"updating {target}")
+        with open(target, "w", encoding="utf-8", newline="\n") as f:
+            f.write("\n".join(content))
+
+
+extras = {}
+
+extras["ja"] = deps_list("fugashi", "ipadic", "unidic_lite", "unidic", "sudachipy", "sudachidict_core", "rhoknp")
+extras["sklearn"] = deps_list("scikit-learn")
+
+extras["torch"] = deps_list("torch", "accelerate")
+extras["accelerate"] = deps_list("accelerate")
+extras["hf_xet"] = deps_list("hf_xet")
+
+if os.name == "nt":  # windows
+    extras["retrieval"] = deps_list("datasets")  # faiss is not supported on windows
+else:
+    extras["retrieval"] = deps_list("faiss-cpu", "datasets")
+
+extras["tokenizers"] = deps_list("tokenizers")
+extras["ftfy"] = deps_list("ftfy")
+extras["onnxruntime"] = deps_list("onnxruntime", "onnxruntime-tools")
+extras["onnx"] = deps_list("onnxconverter-common") + extras["onnxruntime"]
+extras["modelcreation"] = deps_list("cookiecutter")
+
+extras["sagemaker"] = deps_list("sagemaker")
+extras["deepspeed"] = deps_list("deepspeed") + extras["accelerate"]
+extras["optuna"] = deps_list("optuna")
+extras["ray"] = deps_list("ray[tune]")
+extras["sigopt"] = deps_list("sigopt")
+extras["hub-kernels"] = deps_list("kernels")
+
+extras["integrations"] = extras["hub-kernels"] + extras["optuna"] + extras["ray"]
+
+extras["serving"] = deps_list("openai", "pydantic", "uvicorn", "fastapi", "starlette") + extras["torch"]
+extras["audio"] = deps_list(
+    "librosa",
+    "pyctcdecode",
+    "phonemizer",
+    "kenlm",
+)
+# `pip install ".[speech]"` is deprecated and `pip install ".[torch-speech]"` should be used instead
+extras["speech"] = deps_list("torchaudio") + extras["audio"]
+extras["torch-speech"] = deps_list("torchaudio") + extras["audio"]
+extras["vision"] = deps_list("Pillow")
+extras["timm"] = deps_list("timm")
+extras["torch-vision"] = deps_list("torchvision") + extras["vision"]
+extras["natten"] = deps_list("natten")
+extras["codecarbon"] = deps_list("codecarbon")
+extras["video"] = deps_list("av")
+extras["num2words"] = deps_list("num2words")
+extras["sentencepiece"] = deps_list("sentencepiece", "protobuf")
+extras["tiktoken"] = deps_list("tiktoken", "blobfile")
+extras["mistral-common"] = deps_list("mistral-common[opencv]")
+extras["chat_template"] = deps_list("jinja2")
+extras["testing"] = (
+    deps_list(
+        "pytest",
+        "pytest-asyncio",
+        "pytest-rich",
+        "pytest-xdist",
+        "pytest-order",
+        "pytest-rerunfailures",
+        "timeout-decorator",
+        "parameterized",
+        "psutil",
+        "datasets",
+        "dill",
+        "evaluate",
+        "pytest-timeout",
+        "ruff",
+        "rouge-score",
+        "nltk",
+        "GitPython",
+        "sacremoses",
+        "rjieba",
+        "beautifulsoup4",
+        "tensorboard",
+        "pydantic",
+        "sentencepiece",
+        "sacrebleu",  # needed in trainer tests, see references to `run_translation.py`
+        "libcst",
+    )
+    + extras["retrieval"]
+    + extras["modelcreation"]
+    + extras["mistral-common"]
+    + extras["serving"]
+)
+
+extras["deepspeed-testing"] = extras["deepspeed"] + extras["testing"] + extras["optuna"] + extras["sentencepiece"]
+extras["ruff"] = deps_list("ruff")
+extras["quality"] = deps_list("datasets", "ruff", "GitPython", "urllib3", "libcst", "rich", "pandas")
+
+extras["all"] = (
+    extras["torch"]
+    + extras["sentencepiece"]
+    + extras["tokenizers"]
+    + extras["torch-speech"]
+    + extras["vision"]
+    + extras["integrations"]
+    + extras["timm"]
+    + extras["torch-vision"]
+    + extras["codecarbon"]
+    + extras["accelerate"]
+    + extras["video"]
+    + extras["num2words"]
+    + extras["mistral-common"]
+    + extras["chat_template"]
+)
+
+
+extras["dev-torch"] = (
+    extras["testing"]
+    + extras["torch"]
+    + extras["sentencepiece"]
+    + extras["tokenizers"]
+    + extras["torch-speech"]
+    + extras["vision"]
+    + extras["integrations"]
+    + extras["timm"]
+    + extras["torch-vision"]
+    + extras["codecarbon"]
+    + extras["quality"]
+    + extras["ja"]
+    + extras["sklearn"]
+    + extras["modelcreation"]
+    + extras["onnxruntime"]
+    + extras["num2words"]
+)
+
+extras["dev"] = (
+    extras["all"] + extras["testing"] + extras["quality"] + extras["ja"] + extras["sklearn"] + extras["modelcreation"]
+)
+
+extras["torchhub"] = deps_list(
+    "filelock",
+    "huggingface-hub",
+    "importlib_metadata",
+    "numpy",
+    "packaging",
+    "protobuf",
+    "regex",
+    "requests",
+    "sentencepiece",
+    "torch",
+    "tokenizers",
+    "tqdm",
+)
+
+extras["benchmark"] = deps_list("optimum-benchmark")
+
+# OpenTelemetry dependencies for metrics collection in continuous batching
+extras["open-telemetry"] = deps_list("opentelemetry-api") + ["opentelemetry-exporter-otlp", "opentelemetry-sdk"]
+
+# when modifying the following list, make sure to update src/transformers/dependency_versions_check.py
+install_requires = [
+    deps["filelock"],  # filesystem locks, e.g., to prevent parallel downloads
+    deps["huggingface-hub"],
+    deps["numpy"],
+    deps["packaging"],  # utilities from PyPA to e.g., compare versions
+    deps["pyyaml"],  # used for the model cards metadata
+    deps["regex"],  # for OpenAI GPT
+    deps["requests"],  # for downloading models over HTTPS
+    deps["tokenizers"],
+    deps["safetensors"],
+    deps["tqdm"],  # progress bars in model download and training scripts
+]
+
+setup(
+    name="transformers",
+    version="4.57.0.dev0",  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
+    author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
+    author_email="transformers@huggingface.co",
+    description="Transformers: the model-definition framework for state-of-the-art machine learning models in text, vision, audio, and multimodal models, for both inference and training.",
+    long_description=open("README.md", "r", encoding="utf-8").read(),
+    long_description_content_type="text/markdown",
+    keywords="machine-learning nlp python pytorch transformer llm vlm deep-learning inference training model-hub pretrained-models llama gemma qwen",
+    license="Apache 2.0 License",
+    url="https://github.com/huggingface/transformers",
+    package_dir={"": "src"},
+    packages=find_packages("src"),
+    include_package_data=True,
+    package_data={"": ["**/*.cu", "**/*.cpp", "**/*.cuh", "**/*.h", "**/*.pyx", "py.typed"]},
+    zip_safe=False,
+    extras_require=extras,
+    entry_points={
+        "console_scripts": [
+            "transformers=transformers.commands.transformers_cli:main",
+        ]
+    },
+    python_requires=">=3.9.0",
+    install_requires=list(install_requires),
+    classifiers=[
+        "Development Status :: 5 - Production/Stable",
+        "Intended Audience :: Developers",
+        "Intended Audience :: Education",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: Apache Software License",
+        "Operating System :: OS Independent",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
+        "Programming Language :: Python :: 3.13",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    ],
+    cmdclass={"deps_table_update": DepsTableUpdateCommand},
+)
+
+extras["tests_torch"] = deps_list()
+extras["tests_hub"] = deps_list()
+extras["tests_pipelines_torch"] = deps_list()
+extras["tests_onnx"] = deps_list()
+extras["tests_examples_torch"] = deps_list()
+extras["tests_custom_tokenizers"] = deps_list()
+extras["tests_exotic_models"] = deps_list()
+extras["consistency"] = deps_list()