init
This commit is contained in:
7
transformers/.circleci/TROUBLESHOOT.md
Normal file
7
transformers/.circleci/TROUBLESHOOT.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Troubleshooting
|
||||
|
||||
This is a document explaining how to deal with various issues on Circle-CI. The entries may include actual solutions or pointers to Issues that cover those.
|
||||
|
||||
## Circle CI
|
||||
|
||||
* pytest worker runs out of resident RAM and gets killed by `cgroups`: https://github.com/huggingface/transformers/issues/11408
|
||||
227
transformers/.circleci/config.yml
Normal file
227
transformers/.circleci/config.yml
Normal file
@@ -0,0 +1,227 @@
|
||||
version: 2.1
|
||||
setup: true
|
||||
orbs:
|
||||
continuation: circleci/continuation@0.1.0
|
||||
|
||||
parameters:
|
||||
nightly:
|
||||
type: boolean
|
||||
default: false
|
||||
GHA_Actor:
|
||||
type: string
|
||||
default: ""
|
||||
GHA_Action:
|
||||
type: string
|
||||
default: ""
|
||||
GHA_Event:
|
||||
type: string
|
||||
default: ""
|
||||
GHA_Meta:
|
||||
type: string
|
||||
default: ""
|
||||
|
||||
jobs:
|
||||
# Ensure running with CircleCI/huggingface
|
||||
check_circleci_user:
|
||||
docker:
|
||||
- image: python:3.10-slim
|
||||
resource_class: small
|
||||
parallelism: 1
|
||||
steps:
|
||||
- run: echo $CIRCLE_PROJECT_USERNAME
|
||||
- run: |
|
||||
if [ "$CIRCLE_PROJECT_USERNAME" = "huggingface" ]; then
|
||||
exit 0
|
||||
else
|
||||
echo "The CI is running under $CIRCLE_PROJECT_USERNAME personal account. Please follow https://support.circleci.com/hc/en-us/articles/360008097173-Troubleshooting-why-pull-requests-are-not-triggering-jobs-on-my-organization- to fix it."; exit -1
|
||||
fi
|
||||
# Fetch the tests to run
|
||||
fetch_tests:
|
||||
working_directory: ~/transformers
|
||||
docker:
|
||||
- image: huggingface/transformers-quality
|
||||
parallelism: 1
|
||||
steps:
|
||||
- checkout
|
||||
- run: uv pip install -U -e .
|
||||
- run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
|
||||
- run: mkdir -p test_preparation
|
||||
- run: python utils/tests_fetcher.py | tee tests_fetched_summary.txt
|
||||
- run: python utils/tests_fetcher.py --filter_tests
|
||||
- run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation
|
||||
- run: |
|
||||
if [ ! -s test_preparation/generated_config.yml ]; then
|
||||
echo "No tests to run, exiting early!"
|
||||
circleci-agent step halt
|
||||
fi
|
||||
|
||||
- store_artifacts:
|
||||
path: test_preparation
|
||||
|
||||
- run:
|
||||
name: "Retrieve Artifact Paths"
|
||||
# [reference] https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts
|
||||
# `CIRCLE_TOKEN` is defined as an environment variables set within a context, see `https://circleci.com/docs/contexts/`
|
||||
command: |
|
||||
project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}"
|
||||
job_number=${CIRCLE_BUILD_NUM}
|
||||
url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts"
|
||||
curl -o test_preparation/artifacts.json ${url} --header "Circle-Token: $CIRCLE_TOKEN"
|
||||
- run:
|
||||
name: "Prepare pipeline parameters"
|
||||
command: |
|
||||
python utils/process_test_artifacts.py
|
||||
|
||||
# To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
|
||||
# Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
|
||||
# We used:
|
||||
|
||||
# https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
|
||||
# We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
|
||||
|
||||
- store_artifacts:
|
||||
path: test_preparation/transformed_artifacts.json
|
||||
- store_artifacts:
|
||||
path: test_preparation/artifacts.json
|
||||
- continuation/continue:
|
||||
parameters: test_preparation/transformed_artifacts.json
|
||||
configuration_path: test_preparation/generated_config.yml
|
||||
|
||||
# To run all tests for the nightly build
|
||||
fetch_all_tests:
|
||||
working_directory: ~/transformers
|
||||
docker:
|
||||
- image: huggingface/transformers-quality
|
||||
parallelism: 1
|
||||
steps:
|
||||
- checkout
|
||||
- run: uv pip install -U -e .
|
||||
- run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
|
||||
- run: mkdir -p test_preparation
|
||||
- run: python utils/tests_fetcher.py --fetch_all | tee tests_fetched_summary.txt
|
||||
- run: python utils/tests_fetcher.py --filter_tests
|
||||
- run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation
|
||||
- run: |
|
||||
if [ ! -s test_preparation/generated_config.yml ]; then
|
||||
echo "No tests to run, exiting early!"
|
||||
circleci-agent step halt
|
||||
fi
|
||||
|
||||
- store_artifacts:
|
||||
path: test_preparation
|
||||
|
||||
- run:
|
||||
name: "Retrieve Artifact Paths"
|
||||
command: |
|
||||
project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}"
|
||||
job_number=${CIRCLE_BUILD_NUM}
|
||||
url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts"
|
||||
curl -o test_preparation/artifacts.json ${url}
|
||||
- run:
|
||||
name: "Prepare pipeline parameters"
|
||||
command: |
|
||||
python utils/process_test_artifacts.py
|
||||
|
||||
# To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
|
||||
# Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
|
||||
# We used:
|
||||
|
||||
# https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
|
||||
# We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
|
||||
|
||||
- store_artifacts:
|
||||
path: test_preparation/transformed_artifacts.json
|
||||
- store_artifacts:
|
||||
path: test_preparation/artifacts.json
|
||||
- continuation/continue:
|
||||
parameters: test_preparation/transformed_artifacts.json
|
||||
configuration_path: test_preparation/generated_config.yml
|
||||
|
||||
check_code_quality:
|
||||
working_directory: ~/transformers
|
||||
docker:
|
||||
- image: huggingface/transformers-quality
|
||||
resource_class: large
|
||||
environment:
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
PYTEST_TIMEOUT: 120
|
||||
parallelism: 1
|
||||
steps:
|
||||
- checkout
|
||||
- run: uv pip install -e ".[quality]"
|
||||
- run:
|
||||
name: Show installed libraries and their versions
|
||||
command: pip freeze | tee installed.txt
|
||||
- store_artifacts:
|
||||
path: ~/transformers/installed.txt
|
||||
- run: python -c "from transformers import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
|
||||
- run: ruff check examples tests src utils
|
||||
- run: ruff format examples tests src utils --check
|
||||
- run: python utils/custom_init_isort.py --check_only
|
||||
- run: python utils/sort_auto_mappings.py --check_only
|
||||
- run: python utils/check_doc_toc.py
|
||||
- run: python utils/check_docstrings.py --check_all
|
||||
|
||||
check_repository_consistency:
|
||||
working_directory: ~/transformers
|
||||
docker:
|
||||
- image: huggingface/transformers-consistency
|
||||
resource_class: large
|
||||
environment:
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
PYTEST_TIMEOUT: 120
|
||||
parallelism: 1
|
||||
steps:
|
||||
- checkout
|
||||
- run: uv pip install -e ".[quality]"
|
||||
- run:
|
||||
name: Show installed libraries and their versions
|
||||
command: pip freeze | tee installed.txt
|
||||
- store_artifacts:
|
||||
path: ~/transformers/installed.txt
|
||||
- run: python utils/check_copies.py
|
||||
- run: python utils/check_modular_conversion.py
|
||||
- run: python utils/check_dummies.py
|
||||
- run: python utils/check_repo.py
|
||||
- run: python utils/check_inits.py
|
||||
- run: python utils/check_pipeline_typing.py
|
||||
- run: python utils/check_config_docstrings.py
|
||||
- run: python utils/check_config_attributes.py
|
||||
- run: python utils/check_doctest_list.py
|
||||
- run: make deps_table_check_updated
|
||||
- run: python utils/update_metadata.py --check-only
|
||||
- run: python utils/check_docstrings.py
|
||||
|
||||
workflows:
|
||||
version: 2
|
||||
setup_and_quality:
|
||||
when:
|
||||
and:
|
||||
- equal: [<<pipeline.project.git_url>>, https://github.com/huggingface/transformers]
|
||||
- not: <<pipeline.parameters.nightly>>
|
||||
jobs:
|
||||
- check_circleci_user
|
||||
- check_code_quality
|
||||
- check_repository_consistency
|
||||
- fetch_tests
|
||||
|
||||
setup_and_quality_2:
|
||||
when:
|
||||
not:
|
||||
equal: [<<pipeline.project.git_url>>, https://github.com/huggingface/transformers]
|
||||
jobs:
|
||||
- check_circleci_user
|
||||
- check_code_quality
|
||||
- check_repository_consistency
|
||||
- fetch_tests:
|
||||
# [reference] https://circleci.com/docs/contexts/
|
||||
context:
|
||||
- TRANSFORMERS_CONTEXT
|
||||
|
||||
nightly:
|
||||
when: <<pipeline.parameters.nightly>>
|
||||
jobs:
|
||||
- check_circleci_user
|
||||
- check_code_quality
|
||||
- check_repository_consistency
|
||||
- fetch_all_tests
|
||||
401
transformers/.circleci/create_circleci_config.py
Normal file
401
transformers/.circleci/create_circleci_config.py
Normal file
@@ -0,0 +1,401 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2022 The HuggingFace Inc. team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import argparse
|
||||
import copy
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Optional
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
COMMON_ENV_VARIABLES = {
|
||||
"OMP_NUM_THREADS": 1,
|
||||
"TRANSFORMERS_IS_CI": True,
|
||||
"PYTEST_TIMEOUT": 120,
|
||||
"RUN_PIPELINE_TESTS": False,
|
||||
# will be adjust in `CircleCIJob.to_dict`.
|
||||
"RUN_FLAKY": True,
|
||||
}
|
||||
# Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical
|
||||
COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "vvv": None, "rsfE":None}
|
||||
DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}]
|
||||
|
||||
# Strings that commonly appear in the output of flaky tests when they fail. These are used with `pytest-rerunfailures`
|
||||
# to rerun the tests that match these patterns.
|
||||
FLAKY_TEST_FAILURE_PATTERNS = [
|
||||
"OSError", # Machine/connection transient error
|
||||
"Timeout", # Machine/connection transient error
|
||||
"ConnectionError", # Connection transient error
|
||||
"FileNotFoundError", # Raised by `datasets` on Hub failures
|
||||
"PIL.UnidentifiedImageError", # Raised by `PIL.Image.open` on connection issues
|
||||
"HTTPError", # Also catches HfHubHTTPError
|
||||
"AssertionError: Tensor-likes are not close!", # `torch.testing.assert_close`, we might have unlucky random values
|
||||
# TODO: error downloading tokenizer's `merged.txt` from hub can cause all the exceptions below. Throw and handle
|
||||
# them under a single message.
|
||||
"TypeError: expected str, bytes or os.PathLike object, not NoneType",
|
||||
"TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType",
|
||||
"Converting from Tiktoken failed",
|
||||
"KeyError: <class ",
|
||||
"TypeError: not a string",
|
||||
]
|
||||
|
||||
|
||||
class EmptyJob:
|
||||
job_name = "empty"
|
||||
|
||||
def to_dict(self):
|
||||
steps = [{"run": 'ls -la'}]
|
||||
if self.job_name == "collection_job":
|
||||
steps.extend(
|
||||
[
|
||||
"checkout",
|
||||
{"run": "pip install requests || true"},
|
||||
{"run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done || true"""},
|
||||
{"run": 'python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID || true'},
|
||||
{"store_artifacts": {"path": "outputs"}},
|
||||
{"run": 'echo "All required jobs have now completed"'},
|
||||
]
|
||||
)
|
||||
|
||||
return {
|
||||
"docker": copy.deepcopy(DEFAULT_DOCKER_IMAGE),
|
||||
"resource_class": "small",
|
||||
"steps": steps,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class CircleCIJob:
|
||||
name: str
|
||||
additional_env: dict[str, Any] = None
|
||||
docker_image: list[dict[str, str]] = None
|
||||
install_steps: list[str] = None
|
||||
marker: Optional[str] = None
|
||||
parallelism: Optional[int] = 0
|
||||
pytest_num_workers: int = 8
|
||||
pytest_options: dict[str, Any] = None
|
||||
resource_class: Optional[str] = "xlarge"
|
||||
tests_to_run: Optional[list[str]] = None
|
||||
num_test_files_per_worker: Optional[int] = 10
|
||||
# This should be only used for doctest job!
|
||||
command_timeout: Optional[int] = None
|
||||
|
||||
def __post_init__(self):
|
||||
# Deal with defaults for mutable attributes.
|
||||
if self.additional_env is None:
|
||||
self.additional_env = {}
|
||||
if self.docker_image is None:
|
||||
# Let's avoid changing the default list and make a copy.
|
||||
self.docker_image = copy.deepcopy(DEFAULT_DOCKER_IMAGE)
|
||||
else:
|
||||
# BIG HACK WILL REMOVE ONCE FETCHER IS UPDATED
|
||||
print(os.environ.get("GIT_COMMIT_MESSAGE"))
|
||||
if "[build-ci-image]" in os.environ.get("GIT_COMMIT_MESSAGE", "") or os.environ.get("GIT_COMMIT_MESSAGE", "") == "dev-ci":
|
||||
self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev"
|
||||
print(f"Using {self.docker_image} docker image")
|
||||
if self.install_steps is None:
|
||||
self.install_steps = ["uv pip install ."]
|
||||
# Use a custom patched pytest to force exit the process at the end, to avoid `Too long with no output (exceeded 10m0s): context deadline exceeded`
|
||||
self.install_steps.append("uv pip install git+https://github.com/ydshieh/pytest.git@8.4.1-ydshieh")
|
||||
if self.pytest_options is None:
|
||||
self.pytest_options = {}
|
||||
if isinstance(self.tests_to_run, str):
|
||||
self.tests_to_run = [self.tests_to_run]
|
||||
else:
|
||||
test_file = os.path.join("test_preparation" , f"{self.job_name}_test_list.txt")
|
||||
print("Looking for ", test_file)
|
||||
if os.path.exists(test_file):
|
||||
with open(test_file) as f:
|
||||
expanded_tests = f.read().strip().split("\n")
|
||||
self.tests_to_run = expanded_tests
|
||||
print("Found:", expanded_tests)
|
||||
else:
|
||||
self.tests_to_run = []
|
||||
print("not Found")
|
||||
|
||||
def to_dict(self):
|
||||
env = COMMON_ENV_VARIABLES.copy()
|
||||
if self.job_name != "tests_hub":
|
||||
# fmt: off
|
||||
# not critical
|
||||
env.update({"HF_TOKEN": "".join(["h", "f", "_", "H", "o", "d", "V", "u", "M", "q", "b", "R", "m", "t", "b", "z", "F", "Q", "O", "Q", "A", "J", "G", "D", "l", "V", "Q", "r", "R", "N", "w", "D", "M", "V", "C", "s", "d"])})
|
||||
# fmt: on
|
||||
|
||||
# Do not run tests decorated by @is_flaky on pull requests
|
||||
env['RUN_FLAKY'] = os.environ.get("CIRCLE_PULL_REQUEST", "") == ""
|
||||
env.update(self.additional_env)
|
||||
|
||||
job = {
|
||||
"docker": self.docker_image,
|
||||
"environment": env,
|
||||
}
|
||||
if self.resource_class is not None:
|
||||
job["resource_class"] = self.resource_class
|
||||
|
||||
all_options = {**COMMON_PYTEST_OPTIONS, **self.pytest_options}
|
||||
pytest_flags = [f"--{key}={value}" if (value is not None or key in ["doctest-modules"]) else f"-{key}" for key, value in all_options.items()]
|
||||
pytest_flags.append(
|
||||
f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}"
|
||||
)
|
||||
# Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues
|
||||
timeout_cmd = f"timeout {self.command_timeout} " if self.command_timeout else ""
|
||||
marker_cmd = f"-m '{self.marker}'" if self.marker is not None else ""
|
||||
junit_flags = " -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml"
|
||||
joined_flaky_patterns = "|".join(FLAKY_TEST_FAILURE_PATTERNS)
|
||||
repeat_on_failure_flags = f"--reruns 5 --reruns-delay 2 --only-rerun '({joined_flaky_patterns})'"
|
||||
parallel = f' << pipeline.parameters.{self.job_name}_parallelism >> '
|
||||
steps = [
|
||||
"checkout",
|
||||
{"attach_workspace": {"at": "test_preparation"}},
|
||||
{"run": "apt-get update && apt-get install -y curl"},
|
||||
{"run": " && ".join(self.install_steps)},
|
||||
{"run": {"name": "Download NLTK files", "command": """python -c "import nltk; nltk.download('punkt', quiet=True)" """} if "example" in self.name else "echo Skipping"},
|
||||
{"run": {
|
||||
"name": "Show installed libraries and their size",
|
||||
"command": """du -h -d 1 "$(pip -V | cut -d ' ' -f 4 | sed 's/pip//g')" | grep -vE "dist-info|_distutils_hack|__pycache__" | sort -h | tee installed.txt || true"""}
|
||||
},
|
||||
{"run": {
|
||||
"name": "Show installed libraries and their versions",
|
||||
"command": """pip list --format=freeze | tee installed.txt || true"""}
|
||||
},
|
||||
{"run": {
|
||||
"name": "Show biggest libraries",
|
||||
"command": """dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true"""}
|
||||
},
|
||||
{"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}},
|
||||
{"run": {"name": "Get files to test", "command":f'curl -L -o {self.job_name}_test_list.txt <<pipeline.parameters.{self.job_name}_test_list>> --header "Circle-Token: $CIRCLE_TOKEN"' if self.name != "pr_documentation_tests" else 'echo "Skipped"'}},
|
||||
{"run": {"name": "Split tests across parallel nodes: show current parallel tests",
|
||||
"command": f"TESTS=$(circleci tests split --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt"
|
||||
}
|
||||
},
|
||||
# During the CircleCI docker images build time, we might already (or not) download the data.
|
||||
# If it's done already, the files are inside the directory `/test_data/`.
|
||||
{"run": {"name": "fetch hub objects before pytest", "command": "cp -r /test_data/* . 2>/dev/null || true; python3 utils/fetch_hub_objects_for_ci.py"}},
|
||||
{"run": {
|
||||
"name": "Run tests",
|
||||
"command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {junit_flags} {repeat_on_failure_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"}
|
||||
},
|
||||
{"run":
|
||||
{
|
||||
"name": "Check for test crashes",
|
||||
"when": "always",
|
||||
"command": """if [ ! -f tests_output.txt ]; then
|
||||
echo "ERROR: tests_output.txt does not exist - tests may not have run properly"
|
||||
exit 1
|
||||
elif grep -q "crashed and worker restarting disabled" tests_output.txt; then
|
||||
echo "ERROR: Worker crash detected in test output"
|
||||
echo "Found: crashed and worker restarting disabled"
|
||||
exit 1
|
||||
else
|
||||
echo "Tests output file exists and no worker crashes detected"
|
||||
fi"""
|
||||
},
|
||||
},
|
||||
{"run": {"name": "Expand to show skipped tests", "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}},
|
||||
{"run": {"name": "Failed tests: show reasons", "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}},
|
||||
{"run": {"name": "Errors", "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}},
|
||||
{"store_test_results": {"path": "test-results"}},
|
||||
{"store_artifacts": {"path": "test-results/junit.xml"}},
|
||||
{"store_artifacts": {"path": "reports"}},
|
||||
{"store_artifacts": {"path": "tests.txt"}},
|
||||
{"store_artifacts": {"path": "splitted_tests.txt"}},
|
||||
{"store_artifacts": {"path": "installed.txt"}},
|
||||
]
|
||||
if self.parallelism:
|
||||
job["parallelism"] = parallel
|
||||
job["steps"] = steps
|
||||
return job
|
||||
|
||||
@property
|
||||
def job_name(self):
|
||||
return self.name if ("examples" in self.name or "pipeline" in self.name or "pr_documentation" in self.name) else f"tests_{self.name}"
|
||||
|
||||
|
||||
# JOBS
|
||||
torch_job = CircleCIJob(
|
||||
"torch",
|
||||
docker_image=[{"image": "huggingface/transformers-torch-light"}],
|
||||
marker="not generate",
|
||||
parallelism=6,
|
||||
)
|
||||
|
||||
generate_job = CircleCIJob(
|
||||
"generate",
|
||||
docker_image=[{"image": "huggingface/transformers-torch-light"}],
|
||||
# networkx==3.3 (after #36957) cause some issues
|
||||
# TODO: remove this once it works directly
|
||||
install_steps=["uv pip install ."],
|
||||
marker="generate",
|
||||
parallelism=6,
|
||||
)
|
||||
|
||||
tokenization_job = CircleCIJob(
|
||||
"tokenization",
|
||||
docker_image=[{"image": "huggingface/transformers-torch-light"}],
|
||||
parallelism=8,
|
||||
)
|
||||
|
||||
processor_job = CircleCIJob(
|
||||
"processors",
|
||||
docker_image=[{"image": "huggingface/transformers-torch-light"}],
|
||||
parallelism=8,
|
||||
)
|
||||
|
||||
pipelines_torch_job = CircleCIJob(
|
||||
"pipelines_torch",
|
||||
additional_env={"RUN_PIPELINE_TESTS": True},
|
||||
docker_image=[{"image":"huggingface/transformers-torch-light"}],
|
||||
marker="is_pipeline_test",
|
||||
parallelism=4,
|
||||
)
|
||||
|
||||
custom_tokenizers_job = CircleCIJob(
|
||||
"custom_tokenizers",
|
||||
additional_env={"RUN_CUSTOM_TOKENIZERS": True},
|
||||
docker_image=[{"image": "huggingface/transformers-custom-tokenizers"}],
|
||||
)
|
||||
|
||||
examples_torch_job = CircleCIJob(
|
||||
"examples_torch",
|
||||
additional_env={"OMP_NUM_THREADS": 8},
|
||||
docker_image=[{"image":"huggingface/transformers-examples-torch"}],
|
||||
# TODO @ArthurZucker remove this once docker is easier to build
|
||||
install_steps=["uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
|
||||
pytest_num_workers=4,
|
||||
)
|
||||
|
||||
hub_job = CircleCIJob(
|
||||
"hub",
|
||||
additional_env={"HUGGINGFACE_CO_STAGING": True},
|
||||
docker_image=[{"image":"huggingface/transformers-torch-light"}],
|
||||
install_steps=[
|
||||
'uv pip install .',
|
||||
'git config --global user.email "ci@dummy.com"',
|
||||
'git config --global user.name "ci"',
|
||||
],
|
||||
marker="is_staging_test",
|
||||
pytest_num_workers=2,
|
||||
resource_class="medium",
|
||||
)
|
||||
|
||||
exotic_models_job = CircleCIJob(
|
||||
"exotic_models",
|
||||
docker_image=[{"image":"huggingface/transformers-exotic-models"}],
|
||||
parallelism=4,
|
||||
pytest_options={"durations": 100},
|
||||
)
|
||||
|
||||
repo_utils_job = CircleCIJob(
|
||||
"repo_utils",
|
||||
docker_image=[{"image":"huggingface/transformers-consistency"}],
|
||||
pytest_num_workers=4,
|
||||
resource_class="large",
|
||||
)
|
||||
|
||||
non_model_job = CircleCIJob(
|
||||
"non_model",
|
||||
docker_image=[{"image": "huggingface/transformers-torch-light"}],
|
||||
# networkx==3.3 (after #36957) cause some issues
|
||||
# TODO: remove this once it works directly
|
||||
install_steps=["uv pip install .[serving]"],
|
||||
marker="not generate",
|
||||
parallelism=6,
|
||||
)
|
||||
|
||||
|
||||
# We also include a `dummy.py` file in the files to be doc-tested to prevent edge case failure. Otherwise, the pytest
|
||||
# hangs forever during test collection while showing `collecting 0 items / 21 errors`. (To see this, we have to remove
|
||||
# the bash output redirection.)
|
||||
py_command = 'from utils.tests_fetcher import get_doctest_files; to_test = get_doctest_files() + ["dummy.py"]; to_test = " ".join(to_test); print(to_test)'
|
||||
py_command = f"$(python3 -c '{py_command}')"
|
||||
command = f'echo """{py_command}""" > pr_documentation_tests_temp.txt'
|
||||
doc_test_job = CircleCIJob(
|
||||
"pr_documentation_tests",
|
||||
docker_image=[{"image":"huggingface/transformers-consistency"}],
|
||||
additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"},
|
||||
install_steps=[
|
||||
# Add an empty file to keep the test step running correctly even no file is selected to be tested.
|
||||
"uv pip install .",
|
||||
"touch dummy.py",
|
||||
command,
|
||||
"cat pr_documentation_tests_temp.txt",
|
||||
"tail -n1 pr_documentation_tests_temp.txt | tee pr_documentation_tests_test_list.txt"
|
||||
],
|
||||
tests_to_run="$(cat pr_documentation_tests.txt)", # noqa
|
||||
pytest_options={"-doctest-modules": None, "doctest-glob": "*.md", "dist": "loadfile", "rvsA": None},
|
||||
command_timeout=1200, # test cannot run longer than 1200 seconds
|
||||
pytest_num_workers=1,
|
||||
)
|
||||
|
||||
REGULAR_TESTS = [torch_job, hub_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
|
||||
EXAMPLES_TESTS = [examples_torch_job]
|
||||
PIPELINE_TESTS = [pipelines_torch_job]
|
||||
REPO_UTIL_TESTS = [repo_utils_job]
|
||||
DOC_TESTS = [doc_test_job]
|
||||
ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job] # fmt: skip
|
||||
|
||||
|
||||
def create_circleci_config(folder=None):
|
||||
if folder is None:
|
||||
folder = os.getcwd()
|
||||
os.environ["test_preparation_dir"] = folder
|
||||
jobs = [k for k in ALL_TESTS if os.path.isfile(os.path.join("test_preparation" , f"{k.job_name}_test_list.txt") )]
|
||||
print("The following jobs will be run ", jobs)
|
||||
|
||||
if len(jobs) == 0:
|
||||
jobs = [EmptyJob()]
|
||||
else:
|
||||
print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
|
||||
# Add a job waiting all the test jobs and aggregate their test summary files at the end
|
||||
collection_job = EmptyJob()
|
||||
collection_job.job_name = "collection_job"
|
||||
jobs = [collection_job] + jobs
|
||||
|
||||
config = {
|
||||
"version": "2.1",
|
||||
"parameters": {
|
||||
# Only used to accept the parameters from the trigger
|
||||
"nightly": {"type": "boolean", "default": False},
|
||||
# Only used to accept the parameters from GitHub Actions trigger
|
||||
"GHA_Actor": {"type": "string", "default": ""},
|
||||
"GHA_Action": {"type": "string", "default": ""},
|
||||
"GHA_Event": {"type": "string", "default": ""},
|
||||
"GHA_Meta": {"type": "string", "default": ""},
|
||||
"tests_to_run": {"type": "string", "default": ""},
|
||||
**{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs},
|
||||
**{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs},
|
||||
},
|
||||
"jobs": {j.job_name: j.to_dict() for j in jobs}
|
||||
}
|
||||
if "CIRCLE_TOKEN" in os.environ:
|
||||
# For private forked repo. (e.g. new model addition)
|
||||
config["workflows"] = {"version": 2, "run_tests": {"jobs": [{j.job_name: {"context": ["TRANSFORMERS_CONTEXT"]}} for j in jobs]}}
|
||||
else:
|
||||
# For public repo. (e.g. `transformers`)
|
||||
config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
|
||||
with open(os.path.join(folder, "generated_config.yml"), "w") as f:
|
||||
f.write(yaml.dump(config, sort_keys=False, default_flow_style=False).replace("' << pipeline", " << pipeline").replace(">> '", " >>"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--fetcher_folder", type=str, default=None, help="Only test that all tests and modules are accounted for."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
create_circleci_config(args.fetcher_folder)
|
||||
71
transformers/.circleci/parse_test_outputs.py
Normal file
71
transformers/.circleci/parse_test_outputs.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import argparse
|
||||
import re
|
||||
|
||||
|
||||
def parse_pytest_output(file_path):
|
||||
skipped_tests = {}
|
||||
skipped_count = 0
|
||||
with open(file_path, 'r') as file:
|
||||
for line in file:
|
||||
match = re.match(r'^SKIPPED \[(\d+)\] (tests/.*): (.*)$', line)
|
||||
if match:
|
||||
skipped_count += 1
|
||||
test_file, test_line, reason = match.groups()
|
||||
skipped_tests[reason] = skipped_tests.get(reason, []) + [(test_file, test_line)]
|
||||
for k,v in sorted(skipped_tests.items(), key=lambda x:len(x[1])):
|
||||
print(f"{len(v):4} skipped because: {k}")
|
||||
print("Number of skipped tests:", skipped_count)
|
||||
|
||||
def parse_pytest_failure_output(file_path):
|
||||
failed_tests = {}
|
||||
failed_count = 0
|
||||
with open(file_path, 'r') as file:
|
||||
for line in file:
|
||||
match = re.match(r'^FAILED (tests/.*) - (.*): (.*)$', line)
|
||||
if match:
|
||||
failed_count += 1
|
||||
_, error, reason = match.groups()
|
||||
failed_tests[reason] = failed_tests.get(reason, []) + [error]
|
||||
for k,v in sorted(failed_tests.items(), key=lambda x:len(x[1])):
|
||||
print(f"{len(v):4} failed because `{v[0]}` -> {k}")
|
||||
print("Number of failed tests:", failed_count)
|
||||
if failed_count>0:
|
||||
exit(1)
|
||||
|
||||
def parse_pytest_errors_output(file_path):
|
||||
print(file_path)
|
||||
error_tests = {}
|
||||
error_count = 0
|
||||
with open(file_path, 'r') as file:
|
||||
for line in file:
|
||||
match = re.match(r'^ERROR (tests/.*) - (.*): (.*)$', line)
|
||||
if match:
|
||||
error_count += 1
|
||||
_, test_error, reason = match.groups()
|
||||
error_tests[reason] = error_tests.get(reason, []) + [test_error]
|
||||
for k,v in sorted(error_tests.items(), key=lambda x:len(x[1])):
|
||||
print(f"{len(v):4} errored out because of `{v[0]}` -> {k}")
|
||||
print("Number of errors:", error_count)
|
||||
if error_count>0:
|
||||
exit(1)
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--file", help="file to parse")
|
||||
parser.add_argument("--skip", action="store_true", help="show skipped reasons")
|
||||
parser.add_argument("--fail", action="store_true", help="show failed tests")
|
||||
parser.add_argument("--errors", action="store_true", help="show failed tests")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.skip:
|
||||
parse_pytest_output(args.file)
|
||||
|
||||
if args.fail:
|
||||
parse_pytest_failure_output(args.file)
|
||||
|
||||
if args.errors:
|
||||
parse_pytest_errors_output(args.file)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user