init
This commit is contained in:
9
transformers/docker/README.md
Normal file
9
transformers/docker/README.md
Normal file
@@ -0,0 +1,9 @@
|
||||
# Dockers for `transformers`
|
||||
|
||||
In this folder you will find various docker files, and some subfolders.
|
||||
- dockerfiles (ex: `consistency.dockerfile`) present under `~/docker` are used for our "fast" CIs. You should be able to use them for tasks that only need CPU. For example `torch-light` is a very light weights container (703MiB).
|
||||
- subfolders contain dockerfiles used for our `slow` CIs, which *can* be used for GPU tasks, but they are **BIG** as they were not specifically designed for a single model / single task. Thus the `~/docker/transformers-pytorch-gpu` includes additional dependencies to allow us to run ALL model tests (say `librosa` or `tesseract`, which you do not need to run LLMs)
|
||||
|
||||
Note that in both case, you need to run `uv pip install -e .`, which should take around 5 seconds. We do it outside the dockerfile for the need of our CI: we checkout a new branch each time, and the `transformers` code is thus updated.
|
||||
|
||||
We are open to contribution, and invite the community to create dockerfiles with potential arguments that properly choose extras depending on the model's dependencies! :hugs:
|
||||
14
transformers/docker/consistency.dockerfile
Normal file
14
transformers/docker/consistency.dockerfile
Normal file
@@ -0,0 +1,14 @@
|
||||
FROM python:3.10-slim
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
USER root
|
||||
ARG REF=main
|
||||
RUN apt-get update && apt-get install -y time git g++ pkg-config make git-lfs
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip install uv && uv pip install --no-cache-dir -U pip setuptools GitPython
|
||||
RUN uv pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir pypi-kenlm
|
||||
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[quality,testing,torch-speech,vision]"
|
||||
RUN git lfs install
|
||||
|
||||
RUN uv pip uninstall transformers
|
||||
RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
|
||||
35
transformers/docker/custom-tokenizers.dockerfile
Normal file
35
transformers/docker/custom-tokenizers.dockerfile
Normal file
@@ -0,0 +1,35 @@
|
||||
FROM python:3.10-slim
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
ARG REF=main
|
||||
USER root
|
||||
RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake wget xz-utils build-essential g++5 libprotobuf-dev protobuf-compiler git-lfs curl
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
|
||||
|
||||
RUN wget https://github.com/ku-nlp/jumanpp/releases/download/v2.0.0-rc3/jumanpp-2.0.0-rc3.tar.xz
|
||||
RUN tar xvf jumanpp-2.0.0-rc3.tar.xz
|
||||
RUN mkdir jumanpp-2.0.0-rc3/bld
|
||||
WORKDIR ./jumanpp-2.0.0-rc3/bld
|
||||
RUN wget -LO catch.hpp https://github.com/catchorg/Catch2/releases/download/v2.13.8/catch.hpp
|
||||
RUN mv catch.hpp ../libs/
|
||||
RUN cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local
|
||||
RUN make install -j 10
|
||||
|
||||
WORKDIR /
|
||||
|
||||
RUN uv pip install --no-cache --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ja,testing,sentencepiece,spacy,ftfy,rjieba]" unidic unidic-lite
|
||||
# spacy is not used so not tested. Causes to failures. TODO fix later
|
||||
RUN uv run python -m unidic download
|
||||
|
||||
# fetch test data and hub objects within CircleCI docker images to reduce even more connections
|
||||
# we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py`
|
||||
# the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers`
|
||||
RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py
|
||||
|
||||
|
||||
RUN uv pip uninstall transformers
|
||||
|
||||
RUN apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
RUN apt remove -y g++ cmake xz-utils libprotobuf-dev protobuf-compiler
|
||||
19
transformers/docker/examples-torch.dockerfile
Normal file
19
transformers/docker/examples-torch.dockerfile
Normal file
@@ -0,0 +1,19 @@
|
||||
FROM python:3.10-slim
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
ARG REF=main
|
||||
USER root
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git-lfs ffmpeg curl
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]" seqeval albumentations jiwer
|
||||
|
||||
# fetch test data and hub objects within CircleCI docker images to reduce even more connections
|
||||
# we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py`
|
||||
# the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers`
|
||||
RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py
|
||||
|
||||
|
||||
RUN uv pip uninstall transformers
|
||||
RUN apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
24
transformers/docker/exotic-models.dockerfile
Normal file
24
transformers/docker/exotic-models.dockerfile
Normal file
@@ -0,0 +1,24 @@
|
||||
FROM python:3.10-slim
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
ARG REF=main
|
||||
USER root
|
||||
RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git libgl1 g++ tesseract-ocr git-lfs curl
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir --no-deps timm accelerate
|
||||
RUN uv pip install -U --no-cache-dir pytesseract python-Levenshtein opencv-python nltk
|
||||
# RUN uv pip install --no-cache-dir natten==0.15.1+torch210cpu -f https://shi-labs.com/natten/wheels
|
||||
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[testing, vision]" 'scikit-learn' 'torch-stft' 'nose' 'dataset'
|
||||
# RUN git clone https://github.com/facebookresearch/detectron2.git
|
||||
# RUN python3 -m pip install --no-cache-dir -e detectron2
|
||||
RUN uv pip install 'git+https://github.com/facebookresearch/detectron2.git@92ae9f0b92aba5867824b4f12aa06a22a60a45d3' --no-build-isolation
|
||||
|
||||
# fetch test data and hub objects within CircleCI docker images to reduce even more connections
|
||||
# we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py`
|
||||
# the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers`
|
||||
RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py
|
||||
|
||||
|
||||
RUN uv pip uninstall transformers
|
||||
RUN apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
18
transformers/docker/pipeline-torch.dockerfile
Normal file
18
transformers/docker/pipeline-torch.dockerfile
Normal file
@@ -0,0 +1,18 @@
|
||||
FROM python:3.10-slim
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
ARG REF=main
|
||||
USER root
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git ffmpeg curl
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]"
|
||||
|
||||
# fetch test data and hub objects within CircleCI docker images to reduce even more connections
|
||||
# we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py`
|
||||
# the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers`
|
||||
RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py
|
||||
|
||||
|
||||
RUN uv pip uninstall transformers
|
||||
9
transformers/docker/quality.dockerfile
Normal file
9
transformers/docker/quality.dockerfile
Normal file
@@ -0,0 +1,9 @@
|
||||
FROM python:3.10-slim
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
ARG REF=main
|
||||
USER root
|
||||
RUN apt-get update && apt-get install -y time git
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip install uv
|
||||
RUN uv pip install --no-cache-dir -U pip setuptools GitPython "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ruff]" urllib3
|
||||
RUN apt-get install -y jq curl && apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
17
transformers/docker/torch-light.dockerfile
Normal file
17
transformers/docker/torch-light.dockerfile
Normal file
@@ -0,0 +1,17 @@
|
||||
FROM python:3.10-slim
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
ARG REF=main
|
||||
USER root
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git-lfs ffmpeg curl
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken,num2words,video]"
|
||||
|
||||
# fetch test data and hub objects within CircleCI docker images to reduce even more connections
|
||||
# we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py`
|
||||
# the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers`
|
||||
RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py
|
||||
|
||||
RUN uv pip uninstall transformers
|
||||
82
transformers/docker/transformers-all-latest-gpu/Dockerfile
Normal file
82
transformers/docker/transformers-all-latest-gpu/Dockerfile
Normal file
@@ -0,0 +1,82 @@
|
||||
FROM nvidia/cuda:12.6.0-cudnn-devel-ubuntu22.04
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Use login shell to read variables from `~/.profile` (to pass dynamic created variables between RUN commands)
|
||||
SHELL ["sh", "-lc"]
|
||||
|
||||
# The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
|
||||
# to be used as arguments for docker build (so far).
|
||||
|
||||
ARG PYTORCH='2.8.0'
|
||||
# Example: `cu102`, `cu113`, etc.
|
||||
ARG CUDA='cu126'
|
||||
# Disable kernel mapping for now until all tests pass
|
||||
ENV DISABLE_KERNEL_MAPPING=1
|
||||
|
||||
RUN apt update
|
||||
RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg git-lfs
|
||||
RUN git lfs install
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip
|
||||
|
||||
ARG REF=main
|
||||
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
|
||||
|
||||
# 1. Put several commands in a single `RUN` to avoid image/layer exporting issue. Could be revised in the future.
|
||||
# 2. Regarding `torch` part, We might need to specify proper versions for `torchvision` and `torchaudio`.
|
||||
# Currently, let's not bother to specify their versions explicitly (so installed with their latest release versions).
|
||||
RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] && [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' || VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile && echo torch=$VERSION && [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir -U timm
|
||||
|
||||
RUN [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git || echo "Don't install detectron2 with nightly torch"
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir pytesseract
|
||||
|
||||
RUN python3 -m pip install -U "itsdangerous<2.1.0"
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/peft@main#egg=peft
|
||||
|
||||
# For bettertransformer
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum
|
||||
# For kernels
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/kernels@main#egg=kernels
|
||||
|
||||
# For video model testing
|
||||
RUN python3 -m pip install --no-cache-dir av
|
||||
|
||||
# Some slow tests require bnb
|
||||
RUN python3 -m pip install --no-cache-dir bitsandbytes
|
||||
|
||||
# Some tests require quanto
|
||||
RUN python3 -m pip install --no-cache-dir quanto
|
||||
|
||||
# After using A10 as CI runner, let's run FA2 tests
|
||||
RUN [ "$PYTORCH" != "pre" ] && python3 -m pip uninstall -y ninja && python3 -m pip install --no-cache-dir ninja && python3 -m pip install flash-attn --no-cache-dir --no-build-isolation || echo "Don't install FA2 with nightly torch"
|
||||
|
||||
# TODO (ydshieh): check this again
|
||||
# `quanto` will install `ninja` which leads to many `CUDA error: an illegal memory access ...` in some model tests
|
||||
# (`deformable_detr`, `rwkv`, `mra`)
|
||||
RUN python3 -m pip uninstall -y ninja
|
||||
|
||||
# For `nougat` tokenizer
|
||||
RUN python3 -m pip install --no-cache-dir python-Levenshtein
|
||||
|
||||
# For `FastSpeech2ConformerTokenizer` tokenizer
|
||||
RUN python3 -m pip install --no-cache-dir g2p-en
|
||||
|
||||
# For Some bitsandbytes tests
|
||||
RUN python3 -m pip install --no-cache-dir einops
|
||||
|
||||
# For Some tests with `@require_liger_kernel`
|
||||
RUN python3 -m pip install --no-cache-dir liger-kernel
|
||||
|
||||
# `kernels` may give different outputs (within 1e-5 range) even with the same model (weights) and the same inputs
|
||||
RUN python3 -m pip uninstall -y kernels
|
||||
|
||||
# When installing in editable mode, `transformers` is not recognized as a package.
|
||||
# this line must be added in order for python to be aware of transformers.
|
||||
RUN cd transformers && python3 setup.py develop
|
||||
18
transformers/docker/transformers-doc-builder/Dockerfile
Normal file
18
transformers/docker/transformers-doc-builder/Dockerfile
Normal file
@@ -0,0 +1,18 @@
|
||||
FROM python:3.10
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
RUN apt update
|
||||
RUN git clone https://github.com/huggingface/transformers
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip && python3 -m pip install --no-cache-dir git+https://github.com/huggingface/doc-builder ./transformers[dev]
|
||||
RUN apt-get -y update && apt-get install -y libsndfile1-dev && apt install -y tesseract-ocr
|
||||
|
||||
# Torch needs to be installed before deepspeed
|
||||
RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed]
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir torchvision git+https://github.com/facebookresearch/detectron2.git pytesseract
|
||||
RUN python3 -m pip install -U "itsdangerous<2.1.0"
|
||||
|
||||
# Test if the image could successfully build the doc. before publishing the image
|
||||
RUN doc-builder build transformers transformers/docs/source/en --build_dir doc-build-dev --notebook_dir notebooks/transformers_doc --clean
|
||||
RUN rm -rf doc-build-dev
|
||||
31
transformers/docker/transformers-gpu/Dockerfile
Normal file
31
transformers/docker/transformers-gpu/Dockerfile
Normal file
@@ -0,0 +1,31 @@
|
||||
FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04
|
||||
LABEL maintainer="Hugging Face"
|
||||
LABEL repository="transformers"
|
||||
|
||||
RUN apt update && \
|
||||
apt install -y bash \
|
||||
build-essential \
|
||||
git \
|
||||
curl \
|
||||
ca-certificates \
|
||||
python3 \
|
||||
python3-pip && \
|
||||
rm -rf /var/lib/apt/lists
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
|
||||
python3 -m pip install --no-cache-dir \
|
||||
jupyter \
|
||||
torch
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/kernels@main#egg=kernels
|
||||
|
||||
RUN git clone https://github.com/NVIDIA/apex
|
||||
RUN cd apex && \
|
||||
python3 setup.py install && \
|
||||
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
|
||||
|
||||
WORKDIR /workspace
|
||||
COPY . transformers/
|
||||
RUN cd transformers/ && \
|
||||
python3 -m pip install --no-cache-dir .
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
71
transformers/docker/transformers-intel-cpu/Dockerfile
Normal file
71
transformers/docker/transformers-intel-cpu/Dockerfile
Normal file
@@ -0,0 +1,71 @@
|
||||
FROM intel/deep-learning-essentials:2025.1.3-0-devel-ubuntu24.04 AS base
|
||||
LABEL maintainer="Hugging Face"
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
ARG PYTHON_VERSION=3.12
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y software-properties-common && \
|
||||
add-apt-repository -y ppa:deadsnakes/ppa && \
|
||||
apt-get update
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get -y install \
|
||||
apt-utils \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
clinfo \
|
||||
curl \
|
||||
git \
|
||||
git-lfs \
|
||||
vim \
|
||||
numactl \
|
||||
gnupg2 \
|
||||
gpg-agent \
|
||||
python3-dev \
|
||||
python3-opencv \
|
||||
unzip \
|
||||
ffmpeg \
|
||||
tesseract-ocr \
|
||||
espeak-ng \
|
||||
wget \
|
||||
ncurses-term \
|
||||
google-perftools \
|
||||
libjemalloc-dev \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Use virtual env because Ubuntu:24 does not allowed pip on original python
|
||||
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
ENV PATH="/root/.local/bin:$PATH"
|
||||
ENV VIRTUAL_ENV="/opt/venv"
|
||||
ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
|
||||
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
|
||||
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
|
||||
|
||||
RUN pip install --upgrade pip wheel
|
||||
RUN pip install torch torchvision torchaudio torchcodec --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
|
||||
RUN pip install av pyctcdecode pytesseract decord galore-torch fire scipy scikit-learn sentencepiece sentence_transformers sacremoses nltk rouge_score librosa soundfile mpi4py pytorch_msssim
|
||||
RUN pip install onnx optimum onnxruntime
|
||||
RUN pip install autoawq
|
||||
RUN pip install gptqmodel --no-build-isolation
|
||||
RUN pip install -U datasets timm transformers accelerate peft diffusers opencv-python kenlm evaluate
|
||||
RUN pip install -U intel-openmp
|
||||
|
||||
# install bitsandbytes
|
||||
RUN git clone https://github.com/bitsandbytes-foundation/bitsandbytes.git && cd bitsandbytes/ && \
|
||||
cmake -DCOMPUTE_BACKEND=cpu -S . && make && pip install . && cd ../
|
||||
|
||||
# CPU don't need triton
|
||||
RUN pip uninstall triton -y
|
||||
|
||||
ENV LD_PRELOAD=${LD_PRELOAD}:/opt/venv/lib/libiomp5.so:/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
|
||||
ENV KMP_AFFINITY=granularity=fine,compact,1,0
|
||||
|
||||
RUN touch /entrypoint.sh
|
||||
RUN chmod +x /entrypoint.sh
|
||||
RUN echo "#!/bin/bash" >> /entrypoint.sh
|
||||
RUN echo "/bin/bash" >> /entrypoint.sh
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
37
transformers/docker/transformers-pytorch-amd-gpu/Dockerfile
Normal file
37
transformers/docker/transformers-pytorch-amd-gpu/Dockerfile
Normal file
@@ -0,0 +1,37 @@
|
||||
FROM rocm/pytorch:rocm6.4.1_ubuntu24.04_py3.12_pytorch_release_2.7.1
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt update && \
|
||||
apt install -y --no-install-recommends git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-dev python3-pip python3-dev ffmpeg git-lfs && \
|
||||
apt clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN git lfs install
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip numpy
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade importlib-metadata setuptools ninja git+https://github.com/facebookresearch/detectron2.git pytesseract "itsdangerous<2.1.0"
|
||||
|
||||
ARG REF=main
|
||||
WORKDIR /
|
||||
|
||||
# Invalidate docker cache from here if new commit is available.
|
||||
ADD https://api.github.com/repos/huggingface/transformers/git/refs/heads/main version.json
|
||||
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
|
||||
|
||||
# Install transformers
|
||||
RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch,testing,video,audio]
|
||||
|
||||
# When installing in editable mode, `transformers` is not recognized as a package.
|
||||
# this line must be added in order for python to be aware of transformers.
|
||||
RUN cd transformers && python3 setup.py develop
|
||||
|
||||
# Remove nvml and nvidia-ml-py as it is not compatible with ROCm. apex is not tested on NVIDIA either.
|
||||
RUN python3 -m pip uninstall py3nvml pynvml nvidia-ml-py apex -y
|
||||
|
||||
# `kernels` may causes many failing tests
|
||||
RUN python3 -m pip uninstall -y kernels
|
||||
|
||||
# On ROCm, torchcodec is required to decode audio files and 0.4 or 0.6 fails
|
||||
RUN python3 -m pip install --no-cache-dir "torchcodec==0.5"
|
||||
@@ -0,0 +1,53 @@
|
||||
FROM rocm/dev-ubuntu-22.04:6.2.4
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
ARG PYTORCH='2.6.0'
|
||||
ARG TORCH_VISION='0.21.0'
|
||||
ARG TORCH_AUDIO='2.6.0'
|
||||
ARG ROCM='6.2.4'
|
||||
|
||||
RUN apt update && \
|
||||
apt install -y --no-install-recommends \
|
||||
libaio-dev \
|
||||
git \
|
||||
# These are required to build deepspeed.
|
||||
python3-dev \
|
||||
python-is-python3 \
|
||||
rocrand-dev \
|
||||
rocthrust-dev \
|
||||
rocblas-dev \
|
||||
hipsolver-dev \
|
||||
hipsparse-dev \
|
||||
hipblas-dev \
|
||||
hipblaslt-dev && \
|
||||
apt clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip ninja "pydantic>=2.0.0"
|
||||
RUN python3 -m pip uninstall -y apex torch torchvision torchaudio
|
||||
RUN python3 -m pip install torch==$PYTORCH torchvision==$TORCH_VISION torchaudio==$TORCH_AUDIO --index-url https://download.pytorch.org/whl/rocm$ROCM --no-cache-dir
|
||||
|
||||
# Pre-build DeepSpeed, so it's be ready for testing (to avoid timeout)
|
||||
RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache-dir -v --disable-pip-version-check 2>&1
|
||||
|
||||
ARG REF=main
|
||||
WORKDIR /
|
||||
|
||||
# Invalidate docker cache from here if new commit is available.
|
||||
ADD https://api.github.com/repos/huggingface/transformers/git/refs/heads/main version.json
|
||||
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir ./transformers[accelerate,testing,sentencepiece,sklearn]
|
||||
|
||||
# When installing in editable mode, `transformers` is not recognized as a package.
|
||||
# this line must be added in order for python to be aware of transformers.
|
||||
RUN cd transformers && python3 setup.py develop
|
||||
|
||||
RUN python3 -c "from deepspeed.launcher.runner import main"
|
||||
|
||||
# Remove nvml as it is not compatible with ROCm
|
||||
RUN python3 -m pip uninstall py3nvml pynvml nvidia-ml-py apex -y
|
||||
|
||||
# `kernels` may causes many failing tests
|
||||
RUN python3 -m pip uninstall -y kernels
|
||||
@@ -0,0 +1,57 @@
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
|
||||
FROM nvcr.io/nvidia/pytorch:24.08-py3
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
ARG PYTORCH='2.8.0'
|
||||
# Example: `cu102`, `cu113`, etc.
|
||||
ARG CUDA='cu126'
|
||||
|
||||
RUN apt -y update
|
||||
RUN apt install -y libaio-dev
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip
|
||||
|
||||
ARG REF=main
|
||||
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
|
||||
|
||||
# `datasets` requires pandas, pandas has some modules compiled with numpy=1.x causing errors
|
||||
RUN python3 -m pip install --no-cache-dir './transformers[deepspeed-testing]' 'pandas<2' 'numpy<2'
|
||||
|
||||
# Install latest release PyTorch
|
||||
# (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.)
|
||||
# (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops)
|
||||
RUN python3 -m pip uninstall -y torch torchvision torchaudio && python3 -m pip install --no-cache-dir -U torch==$PYTORCH torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/$CUDA
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
||||
|
||||
# Uninstall `transformer-engine` shipped with the base image
|
||||
RUN python3 -m pip uninstall -y transformer-engine
|
||||
|
||||
# Uninstall `torch-tensorrt` shipped with the base image
|
||||
RUN python3 -m pip uninstall -y torch-tensorrt
|
||||
|
||||
# recompile apex
|
||||
RUN python3 -m pip uninstall -y apex
|
||||
# RUN git clone https://github.com/NVIDIA/apex
|
||||
# `MAX_JOBS=1` disables parallel building to avoid cpu memory OOM when building image on GitHub Action (standard) runners
|
||||
# TODO: check if there is alternative way to install latest apex
|
||||
# RUN cd apex && MAX_JOBS=1 python3 -m pip install --global-option="--cpp_ext" --global-option="--cuda_ext" --no-cache -v --disable-pip-version-check .
|
||||
|
||||
# Pre-build **latest** DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
|
||||
RUN python3 -m pip uninstall -y deepspeed
|
||||
# This has to be run (again) inside the GPU VMs running the tests.
|
||||
# The installation works here, but some tests fail, if we don't pre-build deepspeed again in the VMs running the tests.
|
||||
# TODO: Find out why test fail.
|
||||
RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
|
||||
|
||||
# `kernels` may give different outputs (within 1e-5 range) even with the same model (weights) and the same inputs
|
||||
RUN python3 -m pip uninstall -y kernels
|
||||
|
||||
# When installing in editable mode, `transformers` is not recognized as a package.
|
||||
# this line must be added in order for python to be aware of transformers.
|
||||
RUN cd transformers && python3 setup.py develop
|
||||
|
||||
# The base image ships with `pydantic==1.8.2` which is not working - i.e. the next command fails
|
||||
RUN python3 -m pip install -U --no-cache-dir "pydantic>=2.0.0"
|
||||
RUN python3 -c "from deepspeed.launcher.runner import main"
|
||||
@@ -0,0 +1,68 @@
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-23-11.html#rel-23-11
|
||||
FROM nvcr.io/nvidia/pytorch:24.08-py3
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Example: `cu102`, `cu113`, etc.
|
||||
ARG CUDA='cu126'
|
||||
|
||||
RUN apt -y update
|
||||
RUN apt install -y libaio-dev
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip
|
||||
|
||||
ARG REF=main
|
||||
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
|
||||
|
||||
RUN python3 -m pip uninstall -y torch torchvision torchaudio
|
||||
|
||||
# Install **nightly** release PyTorch (flag `--pre`)
|
||||
# (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.)
|
||||
# (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops)
|
||||
RUN python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
|
||||
|
||||
# `datasets` requires pandas, pandas has some modules compiled with numpy=1.x causing errors
|
||||
RUN python3 -m pip install --no-cache-dir './transformers[deepspeed-testing]' 'pandas<2' 'numpy<2'
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
||||
|
||||
# Uninstall `transformer-engine` shipped with the base image
|
||||
RUN python3 -m pip uninstall -y transformer-engine
|
||||
|
||||
# Uninstall `torch-tensorrt` and `apex` shipped with the base image
|
||||
RUN python3 -m pip uninstall -y torch-tensorrt apex
|
||||
|
||||
# Pre-build **nightly** release of DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
|
||||
RUN python3 -m pip uninstall -y deepspeed
|
||||
# This has to be run inside the GPU VMs running the tests. (So far, it fails here due to GPU checks during compilation.)
|
||||
# Issue: https://github.com/deepspeedai/DeepSpeed/issues/2010
|
||||
# RUN git clone https://github.com/deepspeedai/DeepSpeed && cd DeepSpeed && rm -rf build && \
|
||||
# DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
|
||||
|
||||
## For `torchdynamo` tests
|
||||
## (see https://github.com/huggingface/transformers/pull/17765)
|
||||
#RUN git clone https://github.com/pytorch/functorch
|
||||
#RUN python3 -m pip install --no-cache-dir ./functorch[aot]
|
||||
#RUN cd functorch && python3 setup.py develop
|
||||
#
|
||||
#RUN git clone https://github.com/pytorch/torchdynamo
|
||||
#RUN python3 -m pip install -r ./torchdynamo/requirements.txt
|
||||
#RUN cd torchdynamo && python3 setup.py develop
|
||||
#
|
||||
## install TensorRT
|
||||
#RUN python3 -m pip install --no-cache-dir -U nvidia-pyindex
|
||||
#RUN python3 -m pip install --no-cache-dir -U nvidia-tensorrt==8.2.4.2
|
||||
#
|
||||
## install torch_tensorrt (fx path)
|
||||
#RUN git clone https://github.com/pytorch/TensorRT.git
|
||||
#RUN cd TensorRT/py && python3 setup.py install --fx-only
|
||||
|
||||
# `kernels` may give different outputs (within 1e-5 range) even with the same model (weights) and the same inputs
|
||||
RUN python3 -m pip uninstall -y kernels
|
||||
|
||||
# When installing in editable mode, `transformers` is not recognized as a package.
|
||||
# this line must be added in order for python to be aware of transformers.
|
||||
RUN cd transformers && python3 setup.py develop
|
||||
|
||||
# Disable for now as deepspeed is not installed above. To be enabled once the issue is fixed.
|
||||
# RUN python3 -c "from deepspeed.launcher.runner import main"
|
||||
36
transformers/docker/transformers-pytorch-gpu/Dockerfile
Normal file
36
transformers/docker/transformers-pytorch-gpu/Dockerfile
Normal file
@@ -0,0 +1,36 @@
|
||||
FROM nvidia/cuda:12.6.0-cudnn-devel-ubuntu22.04
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt update
|
||||
RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip
|
||||
|
||||
ARG REF=main
|
||||
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
|
||||
|
||||
# If set to nothing, will install the latest version
|
||||
ARG PYTORCH='2.8.0'
|
||||
ARG TORCH_VISION=''
|
||||
ARG TORCH_AUDIO=''
|
||||
# Example: `cu102`, `cu113`, etc.
|
||||
ARG CUDA='cu126'
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch,testing,video]
|
||||
|
||||
# Install torch stuff after ./transformers[dev-torch,testing,video], otherwise torch may be resolved to a previous
|
||||
# version.
|
||||
RUN [ ${#PYTORCH} -gt 0 ] && VERSION='torch=='$PYTORCH'.*' || VERSION='torch'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA
|
||||
RUN [ ${#TORCH_VISION} -gt 0 ] && VERSION='torchvision=='TORCH_VISION'.*' || VERSION='torchvision'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA
|
||||
RUN [ ${#TORCH_AUDIO} -gt 0 ] && VERSION='torchaudio=='TORCH_AUDIO'.*' || VERSION='torchaudio'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract
|
||||
RUN python3 -m pip install -U "itsdangerous<2.1.0"
|
||||
|
||||
# `kernels` may give different outputs (within 1e-5 range) even with the same model (weights) and the same inputs
|
||||
RUN python3 -m pip uninstall -y kernels
|
||||
|
||||
# When installing in editable mode, `transformers` is not recognized as a package.
|
||||
# this line must be added in order for python to be aware of transformers.
|
||||
RUN cd transformers && python3 setup.py develop
|
||||
65
transformers/docker/transformers-pytorch-tpu/Dockerfile
Normal file
65
transformers/docker/transformers-pytorch-tpu/Dockerfile
Normal file
@@ -0,0 +1,65 @@
|
||||
FROM google/cloud-sdk:slim
|
||||
|
||||
# Build args.
|
||||
ARG GITHUB_REF=refs/heads/main
|
||||
|
||||
# TODO: This Dockerfile installs pytorch/xla 3.6 wheels. There are also 3.7
|
||||
# wheels available; see below.
|
||||
ENV PYTHON_VERSION=3.6
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
cmake \
|
||||
git \
|
||||
curl \
|
||||
ca-certificates
|
||||
|
||||
# Install conda and python.
|
||||
# NOTE new Conda does not forward the exit status... https://github.com/conda/conda/issues/8385
|
||||
RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-4.7.12-Linux-x86_64.sh && \
|
||||
chmod +x ~/miniconda.sh && \
|
||||
~/miniconda.sh -b && \
|
||||
rm ~/miniconda.sh
|
||||
|
||||
ENV PATH=/root/miniconda3/bin:$PATH
|
||||
|
||||
RUN conda create -y --name container python=$PYTHON_VERSION
|
||||
|
||||
# Run the rest of commands within the new conda env.
|
||||
# Use absolute path to appease Codefactor.
|
||||
SHELL ["/root/miniconda3/bin/conda", "run", "-n", "container", "/bin/bash", "-c"]
|
||||
RUN conda install -y python=$PYTHON_VERSION mkl
|
||||
|
||||
RUN pip uninstall -y torch && \
|
||||
# Python 3.7 wheels are available. Replace cp36-cp36m with cp37-cp37m
|
||||
gsutil cp 'gs://tpu-pytorch/wheels/torch-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' . && \
|
||||
gsutil cp 'gs://tpu-pytorch/wheels/torch_xla-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' . && \
|
||||
gsutil cp 'gs://tpu-pytorch/wheels/torchvision-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' . && \
|
||||
pip install 'torch-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' && \
|
||||
pip install 'torch_xla-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' && \
|
||||
pip install 'torchvision-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' && \
|
||||
rm 'torch-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' && \
|
||||
rm 'torch_xla-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' && \
|
||||
rm 'torchvision-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' && \
|
||||
apt-get install -y libomp5
|
||||
|
||||
ENV LD_LIBRARY_PATH=root/miniconda3/envs/container/lib
|
||||
|
||||
|
||||
# Install huggingface/transformers at the current PR, plus dependencies.
|
||||
RUN git clone https://github.com/huggingface/transformers.git && \
|
||||
cd transformers && \
|
||||
git fetch origin $GITHUB_REF:CI && \
|
||||
git checkout CI && \
|
||||
cd .. && \
|
||||
pip install ./transformers && \
|
||||
pip install -r ./transformers/examples/pytorch/_test_requirements.txt && \
|
||||
pip install pytest
|
||||
|
||||
RUN python -c "import torch_xla; print(torch_xla.__version__)"
|
||||
RUN python -c "import transformers as trf; print(trf.__version__)"
|
||||
RUN conda init bash
|
||||
COPY docker-entrypoint.sh /usr/local/bin/
|
||||
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
|
||||
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
|
||||
CMD ["bash"]
|
||||
@@ -0,0 +1,38 @@
|
||||
local base = import 'templates/base.libsonnet';
|
||||
local tpus = import 'templates/tpus.libsonnet';
|
||||
local utils = import "templates/utils.libsonnet";
|
||||
local volumes = import "templates/volumes.libsonnet";
|
||||
|
||||
local bertBaseCased = base.BaseTest {
|
||||
frameworkPrefix: "hf",
|
||||
modelName: "bert-base-cased",
|
||||
mode: "example",
|
||||
configMaps: [],
|
||||
|
||||
timeout: 3600, # 1 hour, in seconds
|
||||
|
||||
image: std.extVar('image'),
|
||||
imageTag: std.extVar('image-tag'),
|
||||
|
||||
tpuSettings+: {
|
||||
softwareVersion: "pytorch-nightly",
|
||||
},
|
||||
accelerator: tpus.v3_8,
|
||||
|
||||
volumeMap+: {
|
||||
datasets: volumes.PersistentVolumeSpec {
|
||||
name: "huggingface-cluster-disk",
|
||||
mountPath: "/datasets",
|
||||
},
|
||||
},
|
||||
command: utils.scriptCommand(
|
||||
|||
|
||||
python -m pytest -s transformers/examples/pytorch/test_xla_examples.py -v
|
||||
test_exit_code=$?
|
||||
echo "\nFinished running commands.\n"
|
||||
test $test_exit_code -eq 0
|
||||
|||
|
||||
),
|
||||
};
|
||||
|
||||
bertBaseCased.oneshotJob
|
||||
32
transformers/docker/transformers-pytorch-tpu/dataset.yaml
Normal file
32
transformers/docker/transformers-pytorch-tpu/dataset.yaml
Normal file
@@ -0,0 +1,32 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
name: huggingface-cluster-disk
|
||||
spec:
|
||||
storageClassName: ""
|
||||
capacity:
|
||||
storage: 500Gi
|
||||
accessModes:
|
||||
- ReadOnlyMany
|
||||
claimRef:
|
||||
namespace: default
|
||||
name: huggingface-cluster-disk-claim
|
||||
gcePersistentDisk:
|
||||
pdName: huggingface-cluster-disk
|
||||
fsType: ext4
|
||||
readOnly: true
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: huggingface-cluster-disk-claim
|
||||
spec:
|
||||
# Specify "" as the storageClassName so it matches the PersistentVolume's StorageClass.
|
||||
# A nil storageClassName value uses the default StorageClass. For details, see
|
||||
# https://kubernetes.io/docs/concepts/storage/persistent-volumes/#class-1
|
||||
storageClassName: ""
|
||||
accessModes:
|
||||
- ReadOnlyMany
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Ki
|
||||
@@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
source ~/.bashrc
|
||||
echo "running docker-entrypoint.sh"
|
||||
conda activate container
|
||||
echo $KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS
|
||||
echo "printed TPU info"
|
||||
export XRT_TPU_CONFIG="tpu_worker;0;${KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS:7}"
|
||||
exec "$@"#!/bin/bash
|
||||
93
transformers/docker/transformers-pytorch-xpu/Dockerfile
Normal file
93
transformers/docker/transformers-pytorch-xpu/Dockerfile
Normal file
@@ -0,0 +1,93 @@
|
||||
FROM intel/deep-learning-essentials:2025.1.3-0-devel-ubuntu22.04 AS base
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
ARG PYTHON_VER=3.11
|
||||
ENV TORCH_DEVICE_BACKEND_AUTOLOAD=0
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get remove -y python3.10 && apt-get autoremove -y
|
||||
RUN apt-get update && \
|
||||
apt-get install -y software-properties-common && \
|
||||
add-apt-repository -y ppa:deadsnakes/ppa && \
|
||||
apt-get update && \
|
||||
apt-get install -y python$PYTHON_VER python$PYTHON_VER-dev python3-pip && \
|
||||
ln -sf /usr/bin/python$PYTHON_VER /usr/bin/python3 && \
|
||||
ln -sf /usr/bin/python3 /usr/bin/python && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get -y install \
|
||||
apt-utils \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
clinfo \
|
||||
curl \
|
||||
git \
|
||||
git-lfs \
|
||||
vim \
|
||||
numactl \
|
||||
gnupg2 \
|
||||
gpg-agent \
|
||||
zlib1g-dev \
|
||||
rsync \
|
||||
sudo \
|
||||
libnl-genl-3-200 \
|
||||
xpu-smi \
|
||||
unzip \
|
||||
ffmpeg \
|
||||
tesseract-ocr \
|
||||
espeak-ng \
|
||||
wget \
|
||||
ncurses-term && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y \
|
||||
linux-headers-$(uname -r) \
|
||||
linux-modules-extra-$(uname -r) \
|
||||
flex bison \
|
||||
intel-fw-gpu intel-i915-dkms xpu-smi \
|
||||
intel-opencl-icd libze-intel-gpu1 libze1 \
|
||||
intel-media-va-driver-non-free libmfx-gen1 libvpl2 \
|
||||
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
|
||||
libglapi-mesa libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
|
||||
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo intel-ocloc \
|
||||
libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev libze-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN pip install --upgrade pip
|
||||
RUN pip install triton==3.3.0
|
||||
|
||||
RUN pip install torch==2.7.0 torchvision==0.22.0 torchaudio==2.7.0 --index-url https://download.pytorch.org/whl/xpu --no-cache-dir
|
||||
|
||||
RUN pip install evaluate torchdata pyctcdecode pytesseract decord galore-torch fire scipy scikit-learn sentencepiece sacremoses nltk rouge_score librosa soundfile g2p_en mpi4py requests_mock
|
||||
RUN pip install pretty_midi essentia resampy Levenshtein av sacrebleu phonemizer invisible_watermark schedulefree
|
||||
RUN pip install gguf hqq compressed_tensors gptqmodel mergekit autoawq deepspeed torchao onnx
|
||||
RUN pip install hf_transfer huggingface-hub hf-doc-builder datasets optimum-quanto timm transformers accelerate optimum peft
|
||||
|
||||
RUN pip install git+https://github.com/linkedin/Liger-Kernel.git --extra-index-url https://download.pytorch.org/whl/test/xpu
|
||||
|
||||
# install bitsandbytes
|
||||
RUN pip install git+https://github.com/bitsandbytes-foundation/bitsandbytes.git
|
||||
|
||||
ENV OCL_ICD_VENDORS=/etc/OpenCL/vendors
|
||||
ENV FI_PROVIDER_PATH=${I_MPI_ROOT}/lib/libfabric/prov:/usr/lib/x86_64-linux-gnu/libfabric
|
||||
ENV CCL_ROOT=/usr/local
|
||||
ENV CCL_ATL_TRANSPORT=ofi
|
||||
ENV I_MPI_ROOT=/usr/local
|
||||
ENV CLASSPATH=${I_MPI_ROOT}/lib/mpi.jar
|
||||
ENV PATH=${I_MPI_ROOT}/bin/libfabric:${PATH}
|
||||
ENV LD_LIBRARY_PATH=${I_MPI_ROOT}/lib/libfabric:${LD_LIBRARY_PATH}
|
||||
|
||||
RUN touch /entrypoint.sh
|
||||
RUN chmod +x /entrypoint.sh
|
||||
RUN echo "#!/bin/bash" >> /entrypoint.sh
|
||||
RUN echo "source /opt/intel/oneapi/setvars.sh --force && /bin/bash" >> /entrypoint.sh
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
109
transformers/docker/transformers-quantization-latest-gpu/Dockerfile
Executable file
109
transformers/docker/transformers-quantization-latest-gpu/Dockerfile
Executable file
@@ -0,0 +1,109 @@
|
||||
FROM nvidia/cuda:12.6.0-cudnn-devel-ubuntu22.04
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Use login shell to read variables from `~/.profile` (to pass dynamic created variables between RUN commands)
|
||||
SHELL ["sh", "-lc"]
|
||||
|
||||
# The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
|
||||
# to be used as arguments for docker build (so far).
|
||||
|
||||
ARG PYTORCH='2.8.0'
|
||||
# Example: `cu102`, `cu113`, etc.
|
||||
ARG CUDA='cu126'
|
||||
# Disable kernel mapping for quantization tests
|
||||
ENV DISABLE_KERNEL_MAPPING=1
|
||||
|
||||
RUN apt update
|
||||
RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip
|
||||
|
||||
ARG REF=main
|
||||
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
|
||||
|
||||
RUN [ ${#PYTORCH} -gt 0 ] && VERSION='torch=='$PYTORCH'.*' || VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile
|
||||
RUN echo torch=$VERSION
|
||||
# `torchvision` and `torchaudio` should be installed along with `torch`, especially for nightly build.
|
||||
# Currently, let's just use their latest releases (when `torch` is installed with a release version)
|
||||
RUN python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/$CUDA
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
||||
|
||||
# needed in bnb and awq
|
||||
RUN python3 -m pip install --no-cache-dir einops
|
||||
|
||||
# Add bitsandbytes for mixed int8 testing
|
||||
RUN python3 -m pip install --no-cache-dir bitsandbytes
|
||||
|
||||
# Add gptqmodel for gtpq quantization testing, installed from source for pytorch==2.6.0 compatibility
|
||||
RUN python3 -m pip install lm_eval
|
||||
RUN git clone https://github.com/ModelCloud/GPTQModel.git && cd GPTQModel && pip install -v . --no-build-isolation
|
||||
|
||||
# Add optimum for gptq quantization testing
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum
|
||||
|
||||
# Add PEFT
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/peft@main#egg=peft
|
||||
|
||||
# Add hqq for quantization testing
|
||||
RUN python3 -m pip install --no-cache-dir hqq
|
||||
|
||||
# For GGUF tests
|
||||
RUN python3 -m pip install --no-cache-dir gguf
|
||||
|
||||
# Add autoawq for quantization testing
|
||||
RUN python3 -m pip install --no-cache-dir autoawq[kernels]
|
||||
|
||||
# Add quanto for quantization testing
|
||||
RUN python3 -m pip install --no-cache-dir optimum-quanto
|
||||
|
||||
# Add compressed-tensors for quantization testing
|
||||
RUN python3 -m pip install --no-cache-dir compressed-tensors
|
||||
|
||||
# Add AMD Quark for quantization testing
|
||||
RUN python3 -m pip install --no-cache-dir amd-quark
|
||||
|
||||
# Add AutoRound for quantization testing
|
||||
RUN python3 -m pip install --no-cache-dir auto-round
|
||||
|
||||
# Add torchao for quantization testing
|
||||
RUN python3 -m pip install --no-cache-dir torchao
|
||||
|
||||
# Add transformers in editable mode
|
||||
RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch]
|
||||
|
||||
# `kernels` may give different outputs (within 1e-5 range) even with the same model (weights) and the same inputs
|
||||
RUN python3 -m pip uninstall -y kernels
|
||||
|
||||
# Uninstall flash-attn installed by autoawq, it causes issues here : https://github.com/huggingface/transformers/actions/runs/15915442841/job/44892146131
|
||||
RUN python3 -m pip uninstall -y flash-attn
|
||||
|
||||
# When installing in editable mode, `transformers` is not recognized as a package.
|
||||
# this line must be added in order for python to be aware of transformers.
|
||||
RUN cd transformers && python3 setup.py develop
|
||||
|
||||
# Low usage or incompatible lib, will enable later on
|
||||
|
||||
# # Add aqlm for quantization testing
|
||||
# RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2
|
||||
|
||||
# # Add vptq for quantization testing
|
||||
# RUN pip install vptq
|
||||
|
||||
# Add spqr for quantization testing
|
||||
# Commented for now as No matching distribution found we need to reach out to the authors
|
||||
# RUN python3 -m pip install --no-cache-dir spqr_quant[gpu]
|
||||
|
||||
# # Add eetq for quantization testing
|
||||
# RUN git clone https://github.com/NetEase-FuXi/EETQ.git && cd EETQ/ && git submodule update --init --recursive && pip install .
|
||||
|
||||
# # Add flute-kernel and fast_hadamard_transform for quantization testing
|
||||
# # Commented for now as they cause issues with the build
|
||||
# # TODO: create a new workflow to test them
|
||||
# RUN python3 -m pip install --no-cache-dir flute-kernel==0.4.1
|
||||
# RUN python3 -m pip install --no-cache-dir git+https://github.com/Dao-AILab/fast-hadamard-transform.git
|
||||
|
||||
# Add fp-quant for quantization testing
|
||||
# Requires py3.11 but our CI runs on 3.9
|
||||
# RUN python3 -m pip install --no-cache-dir "fp-quant>=0.1.6"
|
||||
Reference in New Issue
Block a user