This commit is contained in:
2025-10-09 16:47:16 +08:00
parent c8feb4deb5
commit e27e3f16bb
5248 changed files with 1778505 additions and 0 deletions

View File

View File

@@ -0,0 +1,248 @@
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import datasets
import numpy as np
from huggingface_hub import AudioClassificationOutputElement
from transformers import (
MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING,
is_torch_available,
)
from transformers.pipelines import AudioClassificationPipeline, pipeline
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_torch,
require_torchaudio,
slow,
)
from .test_pipelines_common import ANY
if is_torch_available():
import torch
@is_pipeline_test
class AudioClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
_dataset = None
@classmethod
def _load_dataset(cls):
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
if cls._dataset is None:
cls._dataset = datasets.load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation"
)
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
audio_classifier = AudioClassificationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
# test with a raw waveform
audio = np.zeros((34000,))
audio2 = np.zeros((14000,))
return audio_classifier, [audio2, audio]
def run_pipeline_test(self, audio_classifier, examples):
audio2, audio = examples
output = audio_classifier(audio)
# by default a model is initialized with num_labels=2
self.assertEqual(
output,
[
{"score": ANY(float), "label": ANY(str)},
{"score": ANY(float), "label": ANY(str)},
],
)
output = audio_classifier(audio, top_k=1)
self.assertEqual(
output,
[
{"score": ANY(float), "label": ANY(str)},
],
)
self.run_torchaudio(audio_classifier)
for single_output in output:
compare_pipeline_output_to_hub_spec(single_output, AudioClassificationOutputElement)
@require_torchaudio
def run_torchaudio(self, audio_classifier):
self._load_dataset()
# test with a local file
audio = self._dataset[0]["audio"]["array"]
output = audio_classifier(audio)
self.assertEqual(
output,
[
{"score": ANY(float), "label": ANY(str)},
{"score": ANY(float), "label": ANY(str)},
],
)
@require_torch
def test_small_model_pt(self):
model = "anton-l/wav2vec2-random-tiny-classifier"
audio_classifier = pipeline("audio-classification", model=model)
audio = np.ones((8000,))
output = audio_classifier(audio, top_k=4)
EXPECTED_OUTPUT = [
{"score": 0.0842, "label": "no"},
{"score": 0.0838, "label": "up"},
{"score": 0.0837, "label": "go"},
{"score": 0.0834, "label": "right"},
]
EXPECTED_OUTPUT_PT_2 = [
{"score": 0.0845, "label": "stop"},
{"score": 0.0844, "label": "on"},
{"score": 0.0841, "label": "right"},
{"score": 0.0834, "label": "left"},
]
self.assertIn(nested_simplify(output, decimals=4), [EXPECTED_OUTPUT, EXPECTED_OUTPUT_PT_2])
audio_dict = {"array": np.ones((8000,)), "sampling_rate": audio_classifier.feature_extractor.sampling_rate}
output = audio_classifier(audio_dict, top_k=4)
self.assertIn(nested_simplify(output, decimals=4), [EXPECTED_OUTPUT, EXPECTED_OUTPUT_PT_2])
@require_torch
def test_small_model_pt_fp16(self):
model = "anton-l/wav2vec2-random-tiny-classifier"
audio_classifier = pipeline("audio-classification", model=model, dtype=torch.float16)
audio = np.ones((8000,))
output = audio_classifier(audio, top_k=4)
# Expected outputs are collected running the test on torch 2.6 in few scenarios.
# Running on CUDA T4/A100 and on XPU PVC (note: using stock torch xpu, NOT using IPEX):
EXPECTED_OUTPUT = [
{"score": 0.0833, "label": "go"},
{"score": 0.0833, "label": "off"},
{"score": 0.0833, "label": "stop"},
{"score": 0.0833, "label": "on"},
]
# Running on CPU:
EXPECTED_OUTPUT_PT_2 = [
{"score": 0.0839, "label": "no"},
{"score": 0.0837, "label": "go"},
{"score": 0.0836, "label": "yes"},
{"score": 0.0835, "label": "right"},
]
self.assertIn(nested_simplify(output, decimals=4), [EXPECTED_OUTPUT, EXPECTED_OUTPUT_PT_2])
audio_dict = {"array": np.ones((8000,)), "sampling_rate": audio_classifier.feature_extractor.sampling_rate}
output = audio_classifier(audio_dict, top_k=4)
self.assertIn(nested_simplify(output, decimals=4), [EXPECTED_OUTPUT, EXPECTED_OUTPUT_PT_2])
@require_torch
@slow
def test_large_model_pt(self):
model = "superb/wav2vec2-base-superb-ks"
audio_classifier = pipeline("audio-classification", model=model)
dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test")
audio = np.array(dataset[3]["speech"], dtype=np.float32)
output = audio_classifier(audio, top_k=4)
self.assertEqual(
nested_simplify(output, decimals=3),
[
{"score": 0.981, "label": "go"},
{"score": 0.007, "label": "up"},
{"score": 0.006, "label": "_unknown_"},
{"score": 0.001, "label": "down"},
],
)
@require_torch
@slow
def test_top_k_none_returns_all_labels(self):
model_name = "superb/wav2vec2-base-superb-ks" # model with more than 5 labels
classification_pipeline = pipeline(
"audio-classification",
model=model_name,
top_k=None,
)
# Create dummy input
sampling_rate = 16000
signal = np.zeros((sampling_rate,), dtype=np.float32)
result = classification_pipeline(signal)
num_labels = classification_pipeline.model.config.num_labels
self.assertEqual(len(result), num_labels, "Should return all labels when top_k is None")
@require_torch
@slow
def test_top_k_none_with_few_labels(self):
model_name = "superb/hubert-base-superb-er" # model with fewer labels
classification_pipeline = pipeline(
"audio-classification",
model=model_name,
top_k=None,
)
# Create dummy input
sampling_rate = 16000
signal = np.zeros((sampling_rate,), dtype=np.float32)
result = classification_pipeline(signal)
num_labels = classification_pipeline.model.config.num_labels
self.assertEqual(len(result), num_labels, "Should handle models with fewer labels correctly")
@require_torch
@slow
def test_top_k_greater_than_labels(self):
model_name = "superb/hubert-base-superb-er"
classification_pipeline = pipeline(
"audio-classification",
model=model_name,
top_k=100, # intentionally large number
)
# Create dummy input
sampling_rate = 16000
signal = np.zeros((sampling_rate,), dtype=np.float32)
result = classification_pipeline(signal)
num_labels = classification_pipeline.model.config.num_labels
self.assertEqual(len(result), num_labels, "Should cap top_k to number of labels")

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,964 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import gc
import logging
import os
import sys
import tempfile
import unittest
from pathlib import Path
import datasets
from huggingface_hub import delete_repo, snapshot_download
from huggingface_hub.errors import HfHubHTTPError
from transformers import (
AutomaticSpeechRecognitionPipeline,
AutoModelForSequenceClassification,
AutoTokenizer,
DistilBertForSequenceClassification,
MaskGenerationPipeline,
T5ForConditionalGeneration,
TextClassificationPipeline,
TextGenerationPipeline,
pipeline,
)
from transformers.pipelines import PIPELINE_REGISTRY, get_task
from transformers.pipelines.base import Pipeline, _pad
from transformers.testing_utils import (
TOKEN,
USER,
CaptureLogger,
RequestCounter,
backend_empty_cache,
is_pipeline_test,
is_staging_test,
nested_simplify,
require_torch,
require_torch_accelerator,
require_torch_multi_accelerator,
slow,
torch_device,
)
from transformers.utils import direct_transformers_import, is_torch_available
from transformers.utils import logging as transformers_logging
sys.path.append(str(Path(__file__).parent.parent.parent / "utils"))
from test_module.custom_pipeline import PairClassificationPipeline # noqa E402
logger = logging.getLogger(__name__)
PATH_TO_TRANSFORMERS = os.path.join(Path(__file__).parent.parent.parent, "src/transformers")
# Dynamically import the Transformers module to grab the attribute classes of the processor form their names.
transformers_module = direct_transformers_import(PATH_TO_TRANSFORMERS)
class ANY:
def __init__(self, *_types):
self._types = _types
def __eq__(self, other):
return isinstance(other, self._types)
def __repr__(self):
return f"ANY({', '.join(_type.__name__ for _type in self._types)})"
@is_pipeline_test
class CommonPipelineTest(unittest.TestCase):
@require_torch
def test_pipeline_iteration(self):
from torch.utils.data import Dataset
class MyDataset(Dataset):
data = [
"This is a test",
"This restaurant is great",
"This restaurant is awful",
]
def __len__(self):
return 3
def __getitem__(self, i):
return self.data[i]
text_classifier = pipeline(task="text-classification", model="hf-internal-testing/tiny-random-distilbert")
dataset = MyDataset()
for output in text_classifier(dataset):
self.assertEqual(output, {"label": ANY(str), "score": ANY(float)})
@require_torch
def test_check_task_auto_inference(self):
pipe = pipeline(model="hf-internal-testing/tiny-random-distilbert")
self.assertIsInstance(pipe, TextClassificationPipeline)
@require_torch
def test_pipeline_batch_size_global(self):
pipe = pipeline(model="hf-internal-testing/tiny-random-distilbert")
self.assertEqual(pipe._batch_size, None)
self.assertEqual(pipe._num_workers, None)
pipe = pipeline(model="hf-internal-testing/tiny-random-distilbert", batch_size=2, num_workers=1)
self.assertEqual(pipe._batch_size, 2)
self.assertEqual(pipe._num_workers, 1)
@require_torch
def test_pipeline_pathlike(self):
pipe = pipeline(model="hf-internal-testing/tiny-random-distilbert")
with tempfile.TemporaryDirectory() as d:
pipe.save_pretrained(d)
path = Path(d)
newpipe = pipeline(task="text-classification", model=path)
self.assertIsInstance(newpipe, TextClassificationPipeline)
@require_torch
def test_pipeline_override(self):
class MyPipeline(TextClassificationPipeline):
pass
text_classifier = pipeline(model="hf-internal-testing/tiny-random-distilbert", pipeline_class=MyPipeline)
self.assertIsInstance(text_classifier, MyPipeline)
def test_check_task(self):
task = get_task("openai-community/gpt2")
self.assertEqual(task, "text-generation")
with self.assertRaises(RuntimeError):
# Wrong framework
get_task("espnet/siddhana_slurp_entity_asr_train_asr_conformer_raw_en_word_valid.acc.ave_10best")
@require_torch
def test_iterator_data(self):
def data(n: int):
for _ in range(n):
yield "This is a test"
pipe = pipeline(model="hf-internal-testing/tiny-random-distilbert")
results = []
for out in pipe(data(10)):
self.assertEqual(nested_simplify(out), {"label": "LABEL_0", "score": 0.504})
results.append(out)
self.assertEqual(len(results), 10)
# When using multiple workers on streamable data it should still work
# This will force using `num_workers=1` with a warning for now.
results = []
for out in pipe(data(10), num_workers=2):
self.assertEqual(nested_simplify(out), {"label": "LABEL_0", "score": 0.504})
results.append(out)
self.assertEqual(len(results), 10)
@require_torch
def test_unbatch_attentions_hidden_states(self):
model = DistilBertForSequenceClassification.from_pretrained(
"hf-internal-testing/tiny-random-distilbert", output_hidden_states=True, output_attentions=True
)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-distilbert")
text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
# Used to throw an error because `hidden_states` are a tuple of tensors
# instead of the expected tensor.
outputs = text_classifier(["This is great !"] * 20, batch_size=32)
self.assertEqual(len(outputs), 20)
@require_torch
def test_dtype_property(self):
import torch
model_id = "hf-internal-testing/tiny-random-distilbert"
# If dtype is specified in the pipeline constructor, the property should return that type
pipe = pipeline(model=model_id, dtype=torch.float16)
self.assertEqual(pipe.dtype, torch.float16)
# If the underlying model changes dtype, the property should return the new type
pipe.model.to(torch.bfloat16)
self.assertEqual(pipe.dtype, torch.bfloat16)
# If dtype is NOT specified in the pipeline constructor, the property should just return
# the dtype of the underlying model (default)
pipe = pipeline(model=model_id)
self.assertEqual(pipe.dtype, torch.float32)
# If underlying model doesn't have dtype property, simply return None
pipe.model = None
self.assertIsNone(pipe.dtype)
@require_torch
def test_auto_model_pipeline_registration_from_local_dir(self):
with tempfile.TemporaryDirectory() as tmp_dir:
snapshot_download("hf-internal-testing/tiny-random-custom-architecture", local_dir=tmp_dir)
pipe = pipeline("text-generation", tmp_dir, trust_remote_code=True)
self.assertIsInstance(pipe, TextGenerationPipeline) # Assert successful load
@require_torch
def test_pipeline_with_task_parameters_no_side_effects(self):
"""
Regression test: certain pipeline flags, like `task`, modified the model configuration, causing unexpected
side-effects
"""
# This checkpoint has task-specific parameters that will modify the behavior of the pipeline
model = T5ForConditionalGeneration.from_pretrained("t5-small")
self.assertTrue(model.config.num_beams == 1)
# The task-specific parameters used to cause side-effects on `model.config` -- not anymore
pipe = pipeline(model=model, tokenizer=AutoTokenizer.from_pretrained("t5-small"), task="translation_en_to_de")
self.assertTrue(model.config.num_beams == 1)
self.assertTrue(model.generation_config.num_beams == 1)
# Under the hood: we now store a generation config in the pipeline. This generation config stores the
# task-specific parameters.
self.assertTrue(pipe.generation_config.num_beams == 4)
# We can confirm that the task-specific parameters have an effect. (In this case, the default is `num_beams=1`,
# which would crash when `num_return_sequences=4` is passed.)
pipe("Hugging Face doesn't sell hugs.", num_return_sequences=4)
with self.assertRaises(ValueError):
pipe("Hugging Face doesn't sell hugs.", num_return_sequences=4, num_beams=1)
@is_pipeline_test
@require_torch
class PipelineScikitCompatTest(unittest.TestCase):
def test_pipeline_predict(self):
data = ["This is a test"]
text_classifier = pipeline(task="text-classification", model="hf-internal-testing/tiny-random-distilbert")
expected_output = [{"label": ANY(str), "score": ANY(float)}]
actual_output = text_classifier.predict(data)
self.assertEqual(expected_output, actual_output)
def test_pipeline_transform(self):
data = ["This is a test"]
text_classifier = pipeline(task="text-classification", model="hf-internal-testing/tiny-random-distilbert")
expected_output = [{"label": ANY(str), "score": ANY(float)}]
actual_output = text_classifier.transform(data)
self.assertEqual(expected_output, actual_output)
@is_pipeline_test
class PipelinePadTest(unittest.TestCase):
@require_torch
def test_pipeline_padding(self):
import torch
items = [
{
"label": "label1",
"input_ids": torch.LongTensor([[1, 23, 24, 2]]),
"attention_mask": torch.LongTensor([[0, 1, 1, 0]]),
},
{
"label": "label2",
"input_ids": torch.LongTensor([[1, 23, 24, 43, 44, 2]]),
"attention_mask": torch.LongTensor([[0, 1, 1, 1, 1, 0]]),
},
]
self.assertEqual(_pad(items, "label", 0, "right"), ["label1", "label2"])
self.assertTrue(
torch.allclose(
_pad(items, "input_ids", 10, "right"),
torch.LongTensor([[1, 23, 24, 2, 10, 10], [1, 23, 24, 43, 44, 2]]),
)
)
self.assertTrue(
torch.allclose(
_pad(items, "input_ids", 10, "left"),
torch.LongTensor([[10, 10, 1, 23, 24, 2], [1, 23, 24, 43, 44, 2]]),
)
)
self.assertTrue(
torch.allclose(
_pad(items, "attention_mask", 0, "right"), torch.LongTensor([[0, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 0]])
)
)
@require_torch
def test_pipeline_image_padding(self):
import torch
items = [
{
"label": "label1",
"pixel_values": torch.zeros((1, 3, 10, 10)),
},
{
"label": "label2",
"pixel_values": torch.zeros((1, 3, 10, 10)),
},
]
self.assertEqual(_pad(items, "label", 0, "right"), ["label1", "label2"])
self.assertTrue(
torch.allclose(
_pad(items, "pixel_values", 10, "right"),
torch.zeros((2, 3, 10, 10)),
)
)
@require_torch
def test_pipeline_offset_mapping(self):
import torch
items = [
{
"offset_mappings": torch.zeros([1, 11, 2], dtype=torch.long),
},
{
"offset_mappings": torch.zeros([1, 4, 2], dtype=torch.long),
},
]
self.assertTrue(
torch.allclose(
_pad(items, "offset_mappings", 0, "right"),
torch.zeros((2, 11, 2), dtype=torch.long),
),
)
@is_pipeline_test
class PipelineUtilsTest(unittest.TestCase):
@require_torch
def test_pipeline_dataset(self):
from transformers.pipelines.pt_utils import PipelineDataset
dummy_dataset = [0, 1, 2, 3]
def add(number, extra=0):
return number + extra
dataset = PipelineDataset(dummy_dataset, add, {"extra": 2})
self.assertEqual(len(dataset), 4)
outputs = [dataset[i] for i in range(4)]
self.assertEqual(outputs, [2, 3, 4, 5])
@require_torch
def test_pipeline_iterator(self):
from transformers.pipelines.pt_utils import PipelineIterator
dummy_dataset = [0, 1, 2, 3]
def add(number, extra=0):
return number + extra
dataset = PipelineIterator(dummy_dataset, add, {"extra": 2})
self.assertEqual(len(dataset), 4)
outputs = list(dataset)
self.assertEqual(outputs, [2, 3, 4, 5])
@require_torch
def test_pipeline_iterator_no_len(self):
from transformers.pipelines.pt_utils import PipelineIterator
def dummy_dataset():
yield from range(4)
def add(number, extra=0):
return number + extra
dataset = PipelineIterator(dummy_dataset(), add, {"extra": 2})
with self.assertRaises(TypeError):
len(dataset)
outputs = list(dataset)
self.assertEqual(outputs, [2, 3, 4, 5])
@require_torch
def test_pipeline_batch_unbatch_iterator(self):
from transformers.pipelines.pt_utils import PipelineIterator
dummy_dataset = [{"id": [0, 1, 2]}, {"id": [3]}]
def add(number, extra=0):
return {"id": [i + extra for i in number["id"]]}
dataset = PipelineIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
outputs = list(dataset)
self.assertEqual(outputs, [{"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}])
@require_torch
def test_pipeline_batch_unbatch_iterator_tensors(self):
import torch
from transformers.pipelines.pt_utils import PipelineIterator
dummy_dataset = [{"id": torch.LongTensor([[10, 20], [0, 1], [0, 2]])}, {"id": torch.LongTensor([[3]])}]
def add(number, extra=0):
return {"id": number["id"] + extra}
dataset = PipelineIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
outputs = list(dataset)
self.assertEqual(
nested_simplify(outputs), [{"id": [[12, 22]]}, {"id": [[2, 3]]}, {"id": [[2, 4]]}, {"id": [[5]]}]
)
@require_torch
def test_pipeline_chunk_iterator(self):
from transformers.pipelines.pt_utils import PipelineChunkIterator
def preprocess_chunk(n: int):
yield from range(n)
dataset = [2, 3]
dataset = PipelineChunkIterator(dataset, preprocess_chunk, {}, loader_batch_size=3)
outputs = list(dataset)
self.assertEqual(outputs, [0, 1, 0, 1, 2])
@require_torch
def test_pipeline_pack_iterator(self):
from transformers.pipelines.pt_utils import PipelinePackIterator
def pack(item):
return {"id": item["id"] + 1, "is_last": item["is_last"]}
dataset = [
{"id": 0, "is_last": False},
{"id": 1, "is_last": True},
{"id": 0, "is_last": False},
{"id": 1, "is_last": False},
{"id": 2, "is_last": True},
]
dataset = PipelinePackIterator(dataset, pack, {})
outputs = list(dataset)
self.assertEqual(
outputs,
[
[
{"id": 1},
{"id": 2},
],
[
{"id": 1},
{"id": 2},
{"id": 3},
],
],
)
@require_torch
def test_pipeline_pack_unbatch_iterator(self):
from transformers.pipelines.pt_utils import PipelinePackIterator
dummy_dataset = [{"id": [0, 1, 2], "is_last": [False, True, False]}, {"id": [3], "is_last": [True]}]
def add(number, extra=0):
return {"id": [i + extra for i in number["id"]], "is_last": number["is_last"]}
dataset = PipelinePackIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
outputs = list(dataset)
self.assertEqual(outputs, [[{"id": 2}, {"id": 3}], [{"id": 4}, {"id": 5}]])
# is_false Across batch
dummy_dataset = [{"id": [0, 1, 2], "is_last": [False, False, False]}, {"id": [3], "is_last": [True]}]
def add(number, extra=0):
return {"id": [i + extra for i in number["id"]], "is_last": number["is_last"]}
dataset = PipelinePackIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
outputs = list(dataset)
self.assertEqual(outputs, [[{"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}]])
def test_pipeline_negative_device(self):
# To avoid regressing, pipeline used to accept device=-1
classifier = pipeline("text-generation", "hf-internal-testing/tiny-random-bert", device=-1)
expected_output = [{"generated_text": ANY(str)}]
actual_output = classifier("Test input.")
self.assertEqual(expected_output, actual_output)
@require_torch_accelerator
def test_pipeline_no_device(self):
# Test when no device is passed to pipeline
import torch
from transformers import AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-bert")
# Case 1: Model is manually moved to device
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-bert", dtype=torch.float16).to(
torch_device
)
model_device = model.device
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
self.assertEqual(pipe.model.device, model_device)
# Case 2: Model is loaded by accelerate
model = AutoModelForCausalLM.from_pretrained(
"hf-internal-testing/tiny-random-bert", device_map=torch_device, dtype=torch.float16
)
model_device = model.device
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
self.assertEqual(pipe.model.device, model_device)
# Case 3: device_map is passed to model and device is passed to pipeline
model = AutoModelForCausalLM.from_pretrained(
"hf-internal-testing/tiny-random-bert", device_map=torch_device, dtype=torch.float16
)
with self.assertRaises(ValueError):
pipe = pipeline("text-generation", model=model, device="cpu", tokenizer=tokenizer)
@require_torch_multi_accelerator
def test_pipeline_device_not_equal_model_device(self):
# Test when device ids are different, pipeline should move the model to the passed device id
import torch
from transformers import AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-bert")
model_device = f"{torch_device}:1"
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-bert", dtype=torch.float16).to(
model_device
)
target_device = f"{torch_device}:0"
self.assertNotEqual(model_device, target_device)
pipe = pipeline("text-generation", model=model, device=target_device, tokenizer=tokenizer)
self.assertEqual(pipe.model.device, torch.device(target_device))
@slow
@require_torch
def test_load_default_pipelines_pt(self):
import torch
from transformers.pipelines import SUPPORTED_TASKS
set_seed_fn = lambda: torch.manual_seed(0) # noqa: E731
for task in SUPPORTED_TASKS:
if task == "table-question-answering":
# test table in separate test due to more dependencies
continue
self.check_default_pipeline(task, set_seed_fn, self.check_models_equal_pt)
# clean-up as much as possible GPU memory occupied by PyTorch
gc.collect()
backend_empty_cache(torch_device)
@slow
@require_torch
def test_load_default_pipelines_pt_table_qa(self):
import torch
set_seed_fn = lambda: torch.manual_seed(0) # noqa: E731
self.check_default_pipeline("table-question-answering", set_seed_fn, self.check_models_equal_pt)
# clean-up as much as possible GPU memory occupied by PyTorch
gc.collect()
backend_empty_cache(torch_device)
@slow
@require_torch
@require_torch_accelerator
def test_pipeline_accelerator(self):
pipe = pipeline("text-generation", device=torch_device)
_ = pipe("Hello")
@slow
@require_torch
@require_torch_accelerator
def test_pipeline_accelerator_indexed(self):
pipe = pipeline("text-generation", device=torch_device)
_ = pipe("Hello")
@slow
@require_torch
def test_bc_torch_device(self):
import torch
from transformers.pipelines import get_supported_tasks
for task in get_supported_tasks():
# Check that it works for all dtypes
for dtype in ["float16", "bfloat16", "float32", "auto", torch.float16, torch.bfloat16, torch.float32]:
pipe_torch_dtype = pipeline(task, torch_dtype=dtype)
pipe_dtype = pipeline(task, dtype=dtype)
# Make sure all parameters have the same dtype
for (k1, v1), (k2, v2) in zip(
pipe_torch_dtype.model.named_parameters(), pipe_dtype.model.named_parameters()
):
self.assertEqual(k1, k2)
self.assertEqual(v1.dtype, v2.dtype)
pipe_torch_dtype = pipeline(task, model_kwargs={"torch_dtype": dtype})
pipe_dtype = pipeline(task, model_kwargs={"dtype": dtype})
# Make sure all parameters have the same dtype
for (k1, v1), (k2, v2) in zip(
pipe_torch_dtype.model.named_parameters(), pipe_dtype.model.named_parameters()
):
self.assertEqual(k1, k2)
self.assertEqual(v1.dtype, v2.dtype)
def check_default_pipeline(self, task, set_seed_fn, check_models_equal_fn):
from transformers.pipelines import SUPPORTED_TASKS, pipeline
task_dict = SUPPORTED_TASKS[task]
# test to compare pipeline to manually loading the respective model
model = None
relevant_auto_classes = task_dict["pt"]
if len(relevant_auto_classes) == 0:
# task has no default
self.skipTest(f"{task} in pytorch has no default")
# by default use first class
auto_model_cls = relevant_auto_classes[0]
# retrieve correct model ids
if task == "translation":
# special case for translation pipeline which has multiple languages
model_ids = []
revisions = []
tasks = []
for translation_pair in task_dict["default"]:
model_id, revision = task_dict["default"][translation_pair]["model"]
model_ids.append(model_id)
revisions.append(revision)
tasks.append(task + f"_{'_to_'.join(translation_pair)}")
else:
# normal case - non-translation pipeline
model_id, revision = task_dict["default"]["model"]
model_ids = [model_id]
revisions = [revision]
tasks = [task]
# check for equality
for model_id, revision, task in zip(model_ids, revisions, tasks):
# load default model
try:
set_seed_fn()
model = auto_model_cls.from_pretrained(model_id, revision=revision)
except ValueError:
# first auto class is possible not compatible with model, go to next model class
auto_model_cls = relevant_auto_classes[1]
set_seed_fn()
model = auto_model_cls.from_pretrained(model_id, revision=revision)
# load default pipeline
set_seed_fn()
default_pipeline = pipeline(task)
# compare pipeline model with default model
models_are_equal = check_models_equal_fn(default_pipeline.model, model)
self.assertTrue(models_are_equal, f"{task} model doesn't match pipeline.")
def check_models_equal_pt(self, model1, model2):
models_are_equal = True
for model1_p, model2_p in zip(model1.parameters(), model2.parameters()):
if model1_p.data.ne(model2_p.data).sum() > 0:
models_are_equal = False
return models_are_equal
class CustomPipeline(Pipeline):
def _sanitize_parameters(self, **kwargs):
preprocess_kwargs = {}
if "maybe_arg" in kwargs:
preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
return preprocess_kwargs, {}, {}
def preprocess(self, text, maybe_arg=2):
input_ids = self.tokenizer(text, return_tensors="pt")
return input_ids
def _forward(self, model_inputs):
outputs = self.model(**model_inputs)
return outputs
def postprocess(self, model_outputs):
return model_outputs["logits"].softmax(-1).numpy()
@is_pipeline_test
class CustomPipelineTest(unittest.TestCase):
def test_warning_logs(self):
transformers_logging.set_verbosity_debug()
logger_ = transformers_logging.get_logger("transformers.pipelines.base")
alias = "text-classification"
# Get the original task, so we can restore it at the end.
# (otherwise the subsequential tests in `TextClassificationPipelineTests` will fail)
_, original_task, _ = PIPELINE_REGISTRY.check_task(alias)
try:
with CaptureLogger(logger_) as cm:
PIPELINE_REGISTRY.register_pipeline(alias, PairClassificationPipeline)
self.assertIn(f"{alias} is already registered", cm.out)
finally:
# restore
PIPELINE_REGISTRY.supported_tasks[alias] = original_task
def test_register_pipeline(self):
PIPELINE_REGISTRY.register_pipeline(
"custom-text-classification",
pipeline_class=PairClassificationPipeline,
pt_model=AutoModelForSequenceClassification if is_torch_available() else None,
default={"model": ("hf-internal-testing/tiny-random-distilbert", "2ef615d")},
type="text",
)
assert "custom-text-classification" in PIPELINE_REGISTRY.get_supported_tasks()
_, task_def, _ = PIPELINE_REGISTRY.check_task("custom-text-classification")
self.assertEqual(task_def["pt"], (AutoModelForSequenceClassification,) if is_torch_available() else ())
self.assertEqual(task_def["type"], "text")
self.assertEqual(task_def["impl"], PairClassificationPipeline)
self.assertEqual(task_def["default"], {"model": ("hf-internal-testing/tiny-random-distilbert", "2ef615d")})
# Clean registry for next tests.
del PIPELINE_REGISTRY.supported_tasks["custom-text-classification"]
@require_torch
def test_dynamic_pipeline(self):
PIPELINE_REGISTRY.register_pipeline(
"pair-classification",
pipeline_class=PairClassificationPipeline,
pt_model=AutoModelForSequenceClassification if is_torch_available() else None,
)
classifier = pipeline("pair-classification", model="hf-internal-testing/tiny-random-bert")
# Clean registry as we won't need the pipeline to be in it for the rest to work.
del PIPELINE_REGISTRY.supported_tasks["pair-classification"]
with tempfile.TemporaryDirectory() as tmp_dir:
classifier.save_pretrained(tmp_dir)
# checks
self.assertDictEqual(
classifier.model.config.custom_pipelines,
{
"pair-classification": {
"impl": "custom_pipeline.PairClassificationPipeline",
"pt": ("AutoModelForSequenceClassification",) if is_torch_available() else (),
}
},
)
# Fails if the user forget to pass along `trust_remote_code=True`
with self.assertRaises(ValueError):
_ = pipeline(model=tmp_dir)
new_classifier = pipeline(model=tmp_dir, trust_remote_code=True)
# Using trust_remote_code=False forces the traditional pipeline tag
old_classifier = pipeline("text-classification", model=tmp_dir, trust_remote_code=False)
# Can't make an isinstance check because the new_classifier is from the PairClassificationPipeline class of a
# dynamic module
self.assertEqual(new_classifier.__class__.__name__, "PairClassificationPipeline")
self.assertEqual(new_classifier.task, "pair-classification")
results = new_classifier("I hate you", second_text="I love you")
self.assertDictEqual(
nested_simplify(results),
{"label": "LABEL_0", "score": 0.505, "logits": [-0.003, -0.024]},
)
self.assertEqual(old_classifier.__class__.__name__, "TextClassificationPipeline")
self.assertEqual(old_classifier.task, "text-classification")
results = old_classifier("I hate you", text_pair="I love you")
self.assertListEqual(
nested_simplify(results),
[{"label": "LABEL_0", "score": 0.505}],
)
@require_torch
def test_cached_pipeline_has_minimum_calls_to_head(self):
# Make sure we have cached the pipeline.
_ = pipeline("text-classification", model="hf-internal-testing/tiny-random-bert")
with RequestCounter() as counter:
_ = pipeline("text-classification", model="hf-internal-testing/tiny-random-bert")
self.assertEqual(counter["GET"], 0)
self.assertEqual(counter["HEAD"], 1)
self.assertEqual(counter.total_calls, 1)
@require_torch
def test_chunk_pipeline_batching_single_file(self):
# Make sure we have cached the pipeline.
pipe = pipeline(model="hf-internal-testing/tiny-random-Wav2Vec2ForCTC")
ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
audio = ds[40]["audio"]["array"]
pipe = pipeline(model="hf-internal-testing/tiny-random-Wav2Vec2ForCTC")
# For some reason scoping doesn't work if not using `self.`
self.COUNT = 0
forward = pipe.model.forward
def new_forward(*args, **kwargs):
self.COUNT += 1
return forward(*args, **kwargs)
pipe.model.forward = new_forward
for out in pipe(audio, return_timestamps="char", chunk_length_s=3, stride_length_s=[1, 1], batch_size=1024):
pass
self.assertEqual(self.COUNT, 1)
@require_torch
def test_custom_code_with_string_tokenizer(self):
# This test checks for an edge case - tokenizer loading used to fail when using a custom code model
# with a separate tokenizer that was passed as a repo name rather than a tokenizer object.
# See https://github.com/huggingface/transformers/issues/31669
text_generator = pipeline(
"text-generation",
model="hf-internal-testing/tiny-random-custom-architecture",
tokenizer="hf-internal-testing/tiny-random-custom-architecture",
trust_remote_code=True,
)
self.assertIsInstance(text_generator, TextGenerationPipeline) # Assert successful loading
@require_torch
def test_custom_code_with_string_feature_extractor(self):
speech_recognizer = pipeline(
"automatic-speech-recognition",
model="hf-internal-testing/fake-custom-wav2vec2",
feature_extractor="hf-internal-testing/fake-custom-wav2vec2",
tokenizer="facebook/wav2vec2-base-960h", # Test workaround - the pipeline requires a tokenizer
trust_remote_code=True,
)
self.assertIsInstance(speech_recognizer, AutomaticSpeechRecognitionPipeline) # Assert successful loading
@require_torch
def test_custom_code_with_string_preprocessor(self):
mask_generator = pipeline(
"mask-generation",
model="hf-internal-testing/fake-custom-sam",
processor="hf-internal-testing/fake-custom-sam",
trust_remote_code=True,
)
self.assertIsInstance(mask_generator, MaskGenerationPipeline) # Assert successful loading
@require_torch
@is_staging_test
class DynamicPipelineTester(unittest.TestCase):
vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]", "I", "love", "hate", "you"]
@classmethod
def setUpClass(cls):
cls._token = TOKEN
@classmethod
def tearDownClass(cls):
try:
delete_repo(token=cls._token, repo_id="test-dynamic-pipeline")
except HfHubHTTPError:
pass
@unittest.skip("Broken, TODO @Yih-Dar")
def test_push_to_hub_dynamic_pipeline(self):
from transformers import BertConfig, BertForSequenceClassification, BertTokenizer
PIPELINE_REGISTRY.register_pipeline(
"pair-classification",
pipeline_class=PairClassificationPipeline,
pt_model=AutoModelForSequenceClassification,
)
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = BertForSequenceClassification(config).eval()
with tempfile.TemporaryDirectory() as tmp_dir:
vocab_file = os.path.join(tmp_dir, "vocab.txt")
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
tokenizer = BertTokenizer(vocab_file)
classifier = pipeline("pair-classification", model=model, tokenizer=tokenizer)
# Clean registry as we won't need the pipeline to be in it for the rest to work.
del PIPELINE_REGISTRY.supported_tasks["pair-classification"]
classifier.save_pretrained(tmp_dir)
# checks if the configuration has been added after calling the save_pretrained method
self.assertDictEqual(
classifier.model.config.custom_pipelines,
{
"pair-classification": {
"impl": "custom_pipeline.PairClassificationPipeline",
"pt": ("AutoModelForSequenceClassification",),
}
},
)
# use push_to_hub method to push the pipeline
classifier.push_to_hub(f"{USER}/test-dynamic-pipeline", token=self._token)
# Fails if the user forget to pass along `trust_remote_code=True`
with self.assertRaises(ValueError):
_ = pipeline(model=f"{USER}/test-dynamic-pipeline")
new_classifier = pipeline(model=f"{USER}/test-dynamic-pipeline", trust_remote_code=True)
# Can't make an isinstance check because the new_classifier is from the PairClassificationPipeline class of a
# dynamic module
self.assertEqual(new_classifier.__class__.__name__, "PairClassificationPipeline")
# check for tag exitence, tag needs to be added when we are calling a custom pipeline from the hub
# useful for cases such as finetuning
self.assertDictEqual(
new_classifier.model.config.custom_pipelines,
{
"pair-classification": {
"impl": f"{USER}/test-dynamic-pipeline--custom_pipeline.PairClassificationPipeline",
"pt": ("AutoModelForSequenceClassification",),
}
},
)
# test if the pipeline still works after the model is finetuned
# (we are actually testing if the pipeline still works from the final repo)
# this is where the user/repo--module.class is used for
new_classifier.model.push_to_hub(repo_name=f"{USER}/test-pipeline-for-a-finetuned-model", token=self._token)
del new_classifier # free up memory
new_classifier = pipeline(model=f"{USER}/test-pipeline-for-a-finetuned-model", trust_remote_code=True)
results = classifier("I hate you", second_text="I love you")
new_results = new_classifier("I hate you", second_text="I love you")
self.assertDictEqual(nested_simplify(results), nested_simplify(new_results))
# Using trust_remote_code=False forces the traditional pipeline tag
old_classifier = pipeline(
"text-classification", model=f"{USER}/test-dynamic-pipeline", trust_remote_code=False
)
self.assertEqual(old_classifier.__class__.__name__, "TextClassificationPipeline")
self.assertEqual(old_classifier.task, "text-classification")
new_results = old_classifier("I hate you", text_pair="I love you")
self.assertListEqual(
nested_simplify([{"label": results["label"], "score": results["score"]}]), nested_simplify(new_results)
)

View File

@@ -0,0 +1,161 @@
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import datasets
from huggingface_hub import DepthEstimationOutput
from huggingface_hub.utils import insecure_hashlib
from transformers import MODEL_FOR_DEPTH_ESTIMATION_MAPPING, is_torch_available, is_vision_available
from transformers.pipelines import DepthEstimationPipeline, pipeline
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_timm,
require_torch,
require_vision,
slow,
)
from .test_pipelines_common import ANY
if is_torch_available():
import torch
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
def hashimage(image: Image) -> str:
m = insecure_hashlib.md5(image.tobytes())
return m.hexdigest()
@is_pipeline_test
@require_vision
@require_timm
@require_torch
class DepthEstimationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING
_dataset = None
@classmethod
def _load_dataset(cls):
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
if cls._dataset is None:
# we use revision="refs/pr/1" until the PR is merged
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
cls._dataset = datasets.load_dataset(
"hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1"
)
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
depth_estimator = DepthEstimationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
return depth_estimator, [
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
]
def run_pipeline_test(self, depth_estimator, examples):
self._load_dataset()
outputs = depth_estimator("./tests/fixtures/tests_samples/COCO/000000039769.png")
self.assertEqual({"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)}, outputs)
outputs = depth_estimator(
[
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
# RGBA
self._dataset[0]["image"],
# LA
self._dataset[1]["image"],
# L
self._dataset[2]["image"],
]
)
self.assertEqual(
[
{"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)},
{"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)},
{"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)},
{"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)},
{"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)},
],
outputs,
)
for single_output in outputs:
compare_pipeline_output_to_hub_spec(single_output, DepthEstimationOutput)
@slow
@require_torch
def test_large_model_pt(self):
model_id = "Intel/dpt-large"
depth_estimator = pipeline("depth-estimation", model=model_id)
outputs = depth_estimator("http://images.cocodataset.org/val2017/000000039769.jpg")
outputs["depth"] = hashimage(outputs["depth"])
# This seems flaky.
# self.assertEqual(outputs["depth"], "1a39394e282e9f3b0741a90b9f108977")
self.assertEqual(nested_simplify(outputs["predicted_depth"].max().item()), 29.306)
self.assertEqual(nested_simplify(outputs["predicted_depth"].min().item()), 2.662)
@require_torch
def test_small_model_pt(self):
# This is highly irregular to have no small tests.
self.skipTest(reason="There is not hf-internal-testing tiny model for either GLPN nor DPT")
@require_torch
def test_multiprocess(self):
depth_estimator = pipeline(
model="hf-internal-testing/tiny-random-DepthAnythingForDepthEstimation",
num_workers=2,
)
outputs = depth_estimator(
[
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
]
)
self.assertEqual(
[
{"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)},
{"predicted_depth": ANY(torch.Tensor), "depth": ANY(Image.Image)},
],
outputs,
)

View File

@@ -0,0 +1,424 @@
# Copyright 2022 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers import (
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
AutoTokenizer,
is_torch_available,
is_vision_available,
)
from transformers.pipelines import DocumentQuestionAnsweringPipeline, pipeline
from transformers.pipelines.document_question_answering import apply_tesseract
from transformers.testing_utils import (
is_pipeline_test,
nested_simplify,
require_detectron2,
require_pytesseract,
require_torch,
require_torch_bf16,
require_vision,
slow,
)
from .test_pipelines_common import ANY
if is_torch_available():
import torch
if is_vision_available():
from PIL import Image
from transformers.image_utils import load_image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
def load_image(_):
return None
# This is a pinned image from a specific revision of a document question answering space, hosted by HuggingFace,
# so we can expect it to be available.
INVOICE_URL = (
"https://huggingface.co/spaces/impira/docquery/resolve/2f6c96314dc84dfda62d40de9da55f2f5165d403/invoice.png"
)
@is_pipeline_test
@require_torch
@require_vision
class DocumentQuestionAnsweringPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING
@require_pytesseract
@require_vision
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
dqa_pipeline = DocumentQuestionAnsweringPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
max_new_tokens=20,
)
image = INVOICE_URL
word_boxes = list(zip(*apply_tesseract(load_image(image), None, "")))
question = "What is the placebo?"
examples = [
{
"image": load_image(image),
"question": question,
},
{
"image": image,
"question": question,
},
{
"image": image,
"question": question,
"word_boxes": word_boxes,
},
]
return dqa_pipeline, examples
def run_pipeline_test(self, dqa_pipeline, examples):
outputs = dqa_pipeline(examples, top_k=2)
self.assertEqual(
outputs,
[
[
{"score": ANY(float), "answer": ANY(str), "start": ANY(int), "end": ANY(int)},
{"score": ANY(float), "answer": ANY(str), "start": ANY(int), "end": ANY(int)},
]
]
* 3,
)
@require_torch
@require_detectron2
@require_pytesseract
def test_small_model_pt(self):
dqa_pipeline = pipeline(
"document-question-answering", model="hf-internal-testing/tiny-random-layoutlmv2-for-dqa-test"
)
image = INVOICE_URL
question = "How many cats are there?"
expected_output = [
{"score": 0.0001, "answer": "oy 2312/2019", "start": 38, "end": 39},
{"score": 0.0001, "answer": "oy 2312/2019 DUE", "start": 38, "end": 40},
]
outputs = dqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(nested_simplify(outputs, decimals=4), expected_output)
outputs = dqa_pipeline({"image": image, "question": question}, top_k=2)
self.assertEqual(nested_simplify(outputs, decimals=4), expected_output)
# This image does not detect ANY text in it, meaning layoutlmv2 should fail.
# Empty answer probably
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
outputs = dqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(outputs, [])
# We can optionally pass directly the words and bounding boxes
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
words = []
boxes = []
outputs = dqa_pipeline(image=image, question=question, words=words, boxes=boxes, top_k=2)
self.assertEqual(outputs, [])
@require_torch
@require_torch_bf16
@require_detectron2
@require_pytesseract
def test_small_model_pt_bf16(self):
dqa_pipeline = pipeline(
"document-question-answering",
model="hf-internal-testing/tiny-random-layoutlmv2-for-dqa-test",
dtype=torch.bfloat16,
)
image = INVOICE_URL
question = "How many cats are there?"
expected_output = [
{"score": 0.0001, "answer": "oy 2312/2019", "start": 38, "end": 39},
{"score": 0.0001, "answer": "oy 2312/2019 DUE", "start": 38, "end": 40},
]
outputs = dqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(nested_simplify(outputs, decimals=4), expected_output)
outputs = dqa_pipeline({"image": image, "question": question}, top_k=2)
self.assertEqual(nested_simplify(outputs, decimals=4), expected_output)
# This image does not detect ANY text in it, meaning layoutlmv2 should fail.
# Empty answer probably
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
outputs = dqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(outputs, [])
# We can optionally pass directly the words and bounding boxes
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
words = []
boxes = []
outputs = dqa_pipeline(image=image, question=question, words=words, boxes=boxes, top_k=2)
self.assertEqual(outputs, [])
# TODO: Enable this once hf-internal-testing/tiny-random-donut is implemented
# @require_torch
# def test_small_model_pt_donut(self):
# dqa_pipeline = pipeline("document-question-answering", model="hf-internal-testing/tiny-random-donut")
# # dqa_pipeline = pipeline("document-question-answering", model="../tiny-random-donut")
# image = "https://templates.invoicehome.com/invoice-template-us-neat-750px.png"
# question = "How many cats are there?"
#
# outputs = dqa_pipeline(image=image, question=question, top_k=2)
# self.assertEqual(
# nested_simplify(outputs, decimals=4), [{"score": 0.8799, "answer": "2"}, {"score": 0.296, "answer": "1"}]
# )
@slow
@require_torch
@require_detectron2
@require_pytesseract
def test_large_model_pt(self):
dqa_pipeline = pipeline(
"document-question-answering",
model="tiennvcs/layoutlmv2-base-uncased-finetuned-docvqa",
revision="9977165",
)
image = INVOICE_URL
question = "What is the invoice number?"
outputs = dqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9944, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.0009, "answer": "us-001", "start": 16, "end": 16},
],
)
outputs = dqa_pipeline({"image": image, "question": question}, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9944, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.0009, "answer": "us-001", "start": 16, "end": 16},
],
)
outputs = dqa_pipeline(
[{"image": image, "question": question}, {"image": image, "question": question}], top_k=2
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{"score": 0.9944, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.0009, "answer": "us-001", "start": 16, "end": 16},
],
]
* 2,
)
@slow
@require_torch
@require_detectron2
@require_pytesseract
def test_large_model_pt_chunk(self):
dqa_pipeline = pipeline(
"document-question-answering",
model="tiennvcs/layoutlmv2-base-uncased-finetuned-docvqa",
revision="9977165",
max_seq_len=50,
)
image = INVOICE_URL
question = "What is the invoice number?"
outputs = dqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9974, "answer": "1110212019", "start": 23, "end": 23},
{"score": 0.9948, "answer": "us-001", "start": 16, "end": 16},
],
)
outputs = dqa_pipeline({"image": image, "question": question}, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9974, "answer": "1110212019", "start": 23, "end": 23},
{"score": 0.9948, "answer": "us-001", "start": 16, "end": 16},
],
)
outputs = dqa_pipeline(
[{"image": image, "question": question}, {"image": image, "question": question}], top_k=2
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{"score": 0.9974, "answer": "1110212019", "start": 23, "end": 23},
{"score": 0.9948, "answer": "us-001", "start": 16, "end": 16},
]
]
* 2,
)
@slow
@require_torch
@require_pytesseract
@require_vision
def test_large_model_pt_layoutlm(self):
tokenizer = AutoTokenizer.from_pretrained(
"impira/layoutlm-document-qa", revision="3dc6de3", add_prefix_space=True
)
dqa_pipeline = pipeline(
"document-question-answering",
model="impira/layoutlm-document-qa",
tokenizer=tokenizer,
revision="3dc6de3",
)
image = INVOICE_URL
question = "What is the invoice number?"
outputs = dqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=3),
[
{"score": 0.425, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.082, "answer": "1110212019", "start": 23, "end": 23},
],
)
outputs = dqa_pipeline({"image": image, "question": question}, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=3),
[
{"score": 0.425, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.082, "answer": "1110212019", "start": 23, "end": 23},
],
)
outputs = dqa_pipeline(
[{"image": image, "question": question}, {"image": image, "question": question}], top_k=2
)
self.assertEqual(
nested_simplify(outputs, decimals=3),
[
[
{"score": 0.425, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.082, "answer": "1110212019", "start": 23, "end": 23},
]
]
* 2,
)
word_boxes = list(zip(*apply_tesseract(load_image(image), None, "")))
# This model should also work if `image` is set to None
outputs = dqa_pipeline({"image": None, "word_boxes": word_boxes, "question": question}, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=3),
[
{"score": 0.425, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.082, "answer": "1110212019", "start": 23, "end": 23},
],
)
@slow
@require_torch
@require_pytesseract
@require_vision
def test_large_model_pt_layoutlm_chunk(self):
tokenizer = AutoTokenizer.from_pretrained(
"impira/layoutlm-document-qa", revision="3dc6de3", add_prefix_space=True
)
dqa_pipeline = pipeline(
"document-question-answering",
model="impira/layoutlm-document-qa",
tokenizer=tokenizer,
revision="3dc6de3",
max_seq_len=50,
)
image = INVOICE_URL
question = "What is the invoice number?"
outputs = dqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9999, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.9998, "answer": "us-001", "start": 16, "end": 16},
],
)
outputs = dqa_pipeline(
[{"image": image, "question": question}, {"image": image, "question": question}], top_k=2
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{"score": 0.9999, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.9998, "answer": "us-001", "start": 16, "end": 16},
]
]
* 2,
)
word_boxes = list(zip(*apply_tesseract(load_image(image), None, "")))
# This model should also work if `image` is set to None
outputs = dqa_pipeline({"image": None, "word_boxes": word_boxes, "question": question}, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9999, "answer": "us-001", "start": 16, "end": 16},
{"score": 0.9998, "answer": "us-001", "start": 16, "end": 16},
],
)
@slow
@require_torch
def test_large_model_pt_donut(self):
dqa_pipeline = pipeline(
"document-question-answering",
model="naver-clova-ix/donut-base-finetuned-docvqa",
tokenizer=AutoTokenizer.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa"),
image_processor="naver-clova-ix/donut-base-finetuned-docvqa",
)
image = INVOICE_URL
question = "What is the invoice number?"
outputs = dqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(nested_simplify(outputs, decimals=4), [{"answer": "us-001"}])

View File

@@ -0,0 +1,158 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from transformers import (
FEATURE_EXTRACTOR_MAPPING,
IMAGE_PROCESSOR_MAPPING,
MODEL_MAPPING,
FeatureExtractionPipeline,
LxmertConfig,
is_torch_available,
pipeline,
)
from transformers.testing_utils import is_pipeline_test, nested_simplify, require_torch
if is_torch_available():
import torch
@is_pipeline_test
class FeatureExtractionPipelineTests(unittest.TestCase):
model_mapping = MODEL_MAPPING
@require_torch
def test_small_model_pt(self):
feature_extractor = pipeline(task="feature-extraction", model="hf-internal-testing/tiny-random-distilbert")
outputs = feature_extractor("This is a test")
self.assertEqual(
nested_simplify(outputs),
[[[2.287, 1.234, 0.042, 1.53, 1.306, 0.879, -0.526, -1.71, -1.276, 0.756, -0.775, -1.048, -0.25, -0.595, -0.137, -0.598, 2.022, -0.812, 0.284, -0.488, -0.391, -0.403, -0.525, -0.061, -0.228, 1.086, 0.378, -0.14, 0.599, -0.087, -2.259, -0.098], [1.676, 0.232, -1.508, -0.145, 1.798, -1.388, 1.331, -0.37, -0.939, 0.043, 0.06, -0.414, -1.408, 0.24, 0.622, -0.55, -0.569, 1.873, -0.706, 1.924, -0.254, 1.927, -0.423, 0.152, -0.952, 0.509, -0.496, -0.968, 0.093, -1.049, -0.65, 0.312], [0.207, -0.775, -1.822, 0.321, -0.71, -0.201, 0.3, 1.146, -0.233, -0.753, -0.305, 1.309, -1.47, -0.21, 1.802, -1.555, -1.175, 1.323, -0.303, 0.722, -0.076, 0.103, -1.406, 1.931, 0.091, 0.237, 1.172, 1.607, 0.253, -0.9, -1.068, 0.438], [0.615, 1.077, 0.171, -0.175, 1.3, 0.901, -0.653, -0.138, 0.341, -0.654, -0.184, -0.441, -0.424, 0.356, -0.075, 0.26, -1.023, 0.814, 0.524, -0.904, -0.204, -0.623, 1.234, -1.03, 2.594, 0.56, 1.831, -0.199, -1.508, -0.492, -1.687, -2.165], [0.129, 0.008, -1.279, -0.412, -0.004, 1.663, 0.196, 0.104, 0.123, 0.119, 0.635, 1.757, 2.334, -0.799, -1.626, -1.26, 0.595, -0.316, -1.399, 0.232, 0.264, 1.386, -1.171, -0.256, -0.256, -1.944, 1.168, -0.368, -0.714, -0.51, 0.454, 1.148], [-0.32, 0.29, -1.309, -0.177, 0.453, 0.636, -0.024, 0.509, 0.931, -1.754, -1.575, 0.786, 0.046, -1.165, -1.416, 1.373, 1.293, -0.285, -1.541, -1.186, -0.106, -0.994, 2.001, 0.972, -0.02, 1.654, -0.236, 0.643, 1.02, 0.572, -0.914, -0.154], [0.7, -0.937, 0.441, 0.25, 0.78, -0.022, 0.282, -0.095, 1.558, -0.336, 1.706, 0.884, 1.28, 0.198, -0.796, 1.218, -1.769, 1.197, -0.342, -0.177, -0.645, 1.364, 0.008, -0.597, -0.484, -2.772, -0.696, -0.632, -0.34, -1.527, -0.562, 0.862], [2.504, 0.831, -1.271, -0.033, 0.298, -0.735, 1.339, 1.74, 0.233, -1.424, -0.819, -0.761, 0.291, 0.853, -0.092, -0.885, 0.164, 1.025, 0.907, 0.749, -1.515, -0.545, -1.365, 0.271, 0.034, -2.005, 0.031, 0.244, 0.621, 0.176, 0.336, -1.196], [-0.711, 0.591, -1.001, -0.946, 0.784, -1.66, 1.545, 0.799, -0.857, 1.148, 0.213, -0.285, 0.464, -0.139, 0.79, -1.663, -1.121, 0.575, -0.178, -0.508, 1.565, -0.242, -0.346, 1.024, -1.135, -0.158, -2.101, 0.275, 2.009, -0.425, 0.716, 0.981], [0.912, -1.186, -0.846, -0.421, -1.315, -0.827, 0.309, 0.533, 1.029, -2.343, 1.513, -1.238, 1.487, -0.849, 0.896, -0.927, -0.459, 0.159, 0.177, 0.873, 0.935, 1.433, -0.485, 0.737, 1.327, -0.338, 1.608, -0.47, -0.445, -1.118, -0.213, -0.446], [-0.434, -1.362, -1.098, -1.068, 1.507, 0.003, 0.413, -0.395, 0.897, -0.237, 1.405, -0.344, 1.693, 0.677, 0.097, -0.257, -0.602, 1.026, -1.229, 0.855, -0.713, 1.014, 0.443, 0.238, 0.425, -2.184, 1.933, -1.157, -1.132, -0.597, -0.785, 0.967], [0.58, -0.971, 0.789, -0.468, -0.576, 1.779, 1.747, 1.715, -1.939, 0.125, 0.656, -0.042, -1.024, -1.767, 0.107, -0.408, -0.866, -1.774, 1.248, 0.939, -0.033, 1.523, 1.168, -0.744, 0.209, -0.168, -0.316, 0.207, -0.432, 0.047, -0.646, -0.664], [-0.185, -0.613, -1.695, 1.602, -0.32, -0.277, 0.967, 0.728, -0.965, -0.234, 1.069, -0.63, -1.631, 0.711, 0.426, 1.298, -0.191, -0.467, -0.771, 0.971, -0.118, -1.577, -2.064, -0.055, -0.59, 0.642, -0.997, 1.251, 0.538, 1.367, 0.106, 1.704]]]) # fmt: skip
@require_torch
def test_tokenization_small_model_pt(self):
feature_extractor = pipeline(task="feature-extraction", model="hf-internal-testing/tiny-random-distilbert")
# test with empty parameters
outputs = feature_extractor("This is a test")
self.assertEqual(
nested_simplify(outputs),
[[[2.287, 1.234, 0.042, 1.53, 1.306, 0.879, -0.526, -1.71, -1.276, 0.756, -0.775, -1.048, -0.25, -0.595, -0.137, -0.598, 2.022, -0.812, 0.284, -0.488, -0.391, -0.403, -0.525, -0.061, -0.228, 1.086, 0.378, -0.14, 0.599, -0.087, -2.259, -0.098], [1.676, 0.232, -1.508, -0.145, 1.798, -1.388, 1.331, -0.37, -0.939, 0.043, 0.06, -0.414, -1.408, 0.24, 0.622, -0.55, -0.569, 1.873, -0.706, 1.924, -0.254, 1.927, -0.423, 0.152, -0.952, 0.509, -0.496, -0.968, 0.093, -1.049, -0.65, 0.312], [0.207, -0.775, -1.822, 0.321, -0.71, -0.201, 0.3, 1.146, -0.233, -0.753, -0.305, 1.309, -1.47, -0.21, 1.802, -1.555, -1.175, 1.323, -0.303, 0.722, -0.076, 0.103, -1.406, 1.931, 0.091, 0.237, 1.172, 1.607, 0.253, -0.9, -1.068, 0.438], [0.615, 1.077, 0.171, -0.175, 1.3, 0.901, -0.653, -0.138, 0.341, -0.654, -0.184, -0.441, -0.424, 0.356, -0.075, 0.26, -1.023, 0.814, 0.524, -0.904, -0.204, -0.623, 1.234, -1.03, 2.594, 0.56, 1.831, -0.199, -1.508, -0.492, -1.687, -2.165], [0.129, 0.008, -1.279, -0.412, -0.004, 1.663, 0.196, 0.104, 0.123, 0.119, 0.635, 1.757, 2.334, -0.799, -1.626, -1.26, 0.595, -0.316, -1.399, 0.232, 0.264, 1.386, -1.171, -0.256, -0.256, -1.944, 1.168, -0.368, -0.714, -0.51, 0.454, 1.148], [-0.32, 0.29, -1.309, -0.177, 0.453, 0.636, -0.024, 0.509, 0.931, -1.754, -1.575, 0.786, 0.046, -1.165, -1.416, 1.373, 1.293, -0.285, -1.541, -1.186, -0.106, -0.994, 2.001, 0.972, -0.02, 1.654, -0.236, 0.643, 1.02, 0.572, -0.914, -0.154], [0.7, -0.937, 0.441, 0.25, 0.78, -0.022, 0.282, -0.095, 1.558, -0.336, 1.706, 0.884, 1.28, 0.198, -0.796, 1.218, -1.769, 1.197, -0.342, -0.177, -0.645, 1.364, 0.008, -0.597, -0.484, -2.772, -0.696, -0.632, -0.34, -1.527, -0.562, 0.862], [2.504, 0.831, -1.271, -0.033, 0.298, -0.735, 1.339, 1.74, 0.233, -1.424, -0.819, -0.761, 0.291, 0.853, -0.092, -0.885, 0.164, 1.025, 0.907, 0.749, -1.515, -0.545, -1.365, 0.271, 0.034, -2.005, 0.031, 0.244, 0.621, 0.176, 0.336, -1.196], [-0.711, 0.591, -1.001, -0.946, 0.784, -1.66, 1.545, 0.799, -0.857, 1.148, 0.213, -0.285, 0.464, -0.139, 0.79, -1.663, -1.121, 0.575, -0.178, -0.508, 1.565, -0.242, -0.346, 1.024, -1.135, -0.158, -2.101, 0.275, 2.009, -0.425, 0.716, 0.981], [0.912, -1.186, -0.846, -0.421, -1.315, -0.827, 0.309, 0.533, 1.029, -2.343, 1.513, -1.238, 1.487, -0.849, 0.896, -0.927, -0.459, 0.159, 0.177, 0.873, 0.935, 1.433, -0.485, 0.737, 1.327, -0.338, 1.608, -0.47, -0.445, -1.118, -0.213, -0.446], [-0.434, -1.362, -1.098, -1.068, 1.507, 0.003, 0.413, -0.395, 0.897, -0.237, 1.405, -0.344, 1.693, 0.677, 0.097, -0.257, -0.602, 1.026, -1.229, 0.855, -0.713, 1.014, 0.443, 0.238, 0.425, -2.184, 1.933, -1.157, -1.132, -0.597, -0.785, 0.967], [0.58, -0.971, 0.789, -0.468, -0.576, 1.779, 1.747, 1.715, -1.939, 0.125, 0.656, -0.042, -1.024, -1.767, 0.107, -0.408, -0.866, -1.774, 1.248, 0.939, -0.033, 1.523, 1.168, -0.744, 0.209, -0.168, -0.316, 0.207, -0.432, 0.047, -0.646, -0.664], [-0.185, -0.613, -1.695, 1.602, -0.32, -0.277, 0.967, 0.728, -0.965, -0.234, 1.069, -0.63, -1.631, 0.711, 0.426, 1.298, -0.191, -0.467, -0.771, 0.971, -0.118, -1.577, -2.064, -0.055, -0.59, 0.642, -0.997, 1.251, 0.538, 1.367, 0.106, 1.704]]]) # fmt: skip
# test with various tokenizer parameters
tokenize_kwargs = {"max_length": 3}
outputs = feature_extractor("This is a test", tokenize_kwargs=tokenize_kwargs)
self.assertEqual(np.squeeze(outputs).shape, (3, 32))
tokenize_kwargs = {"truncation": True, "padding": True, "max_length": 4}
outputs = feature_extractor(
["This is a test", "This", "This is", "This is a", "This is a test test test test"],
tokenize_kwargs=tokenize_kwargs,
)
self.assertEqual(np.squeeze(outputs).shape, (5, 4, 32))
tokenize_kwargs = {"padding": True, "max_length": 4}
outputs = feature_extractor(
["This is a test", "This", "This is", "This is a", "This is a test test test test"],
truncation=True,
tokenize_kwargs=tokenize_kwargs,
)
self.assertEqual(np.squeeze(outputs).shape, (5, 4, 32))
# raise value error if truncation parameter given for two places
tokenize_kwargs = {"truncation": True}
with self.assertRaises(ValueError):
_ = feature_extractor(
["This is a test", "This", "This is", "This is a", "This is a test test test test"],
truncation=True,
tokenize_kwargs=tokenize_kwargs,
)
@require_torch
def test_return_tensors_pt(self):
feature_extractor = pipeline(task="feature-extraction", model="hf-internal-testing/tiny-random-distilbert")
outputs = feature_extractor("This is a test", return_tensors=True)
self.assertTrue(torch.is_tensor(outputs))
def get_shape(self, input_, shape=None):
if shape is None:
shape = []
if isinstance(input_, list):
subshapes = [self.get_shape(in_, shape) for in_ in input_]
if all(s == 0 for s in subshapes):
shape.append(len(input_))
else:
subshape = subshapes[0]
shape = [len(input_), *subshape]
elif isinstance(input_, float):
return 0
else:
raise TypeError("We expect lists of floats, nothing else")
return shape
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
if tokenizer is None:
self.skipTest(reason="No tokenizer")
elif (
type(model.config) in FEATURE_EXTRACTOR_MAPPING
or isinstance(model.config, LxmertConfig)
or type(model.config) in IMAGE_PROCESSOR_MAPPING
):
self.skipTest(
reason="This is a bimodal model, we need to find a more consistent way to switch on those models."
)
elif model.config.is_encoder_decoder:
self.skipTest(
"""encoder_decoder models are trickier for this pipeline.
Do we want encoder + decoder inputs to get some features?
Do we want encoder only features ?
For now ignore those.
"""
)
feature_extractor_pipeline = FeatureExtractionPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
return feature_extractor_pipeline, ["This is a test", "This is another test"]
def run_pipeline_test(self, feature_extractor, examples):
outputs = feature_extractor("This is a test")
shape = self.get_shape(outputs)
self.assertEqual(shape[0], 1)
# If we send too small input
# there's a bug within FunnelModel (output with shape [1, 4, 2, 1] doesn't match the broadcast shape [1, 4, 2, 2])
outputs = feature_extractor(["This is a test", "Another longer test"])
shape = self.get_shape(outputs)
self.assertEqual(shape[0], 2)
outputs = feature_extractor("This is a test" * 100, truncation=True)
shape = self.get_shape(outputs)
self.assertEqual(shape[0], 1)

View File

@@ -0,0 +1,412 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import gc
import unittest
from transformers import MODEL_FOR_MASKED_LM_MAPPING, FillMaskPipeline, pipeline
from transformers.pipelines import PipelineException
from transformers.testing_utils import (
backend_empty_cache,
is_pipeline_test,
is_torch_available,
nested_simplify,
require_torch,
require_torch_accelerator,
slow,
torch_device,
)
from .test_pipelines_common import ANY
@is_pipeline_test
class FillMaskPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_MASKED_LM_MAPPING
def tearDown(self):
super().tearDown()
# clean-up as much as possible GPU memory occupied by PyTorch
gc.collect()
if is_torch_available():
backend_empty_cache(torch_device)
@require_torch
def test_small_model_pt(self):
unmasker = pipeline(task="fill-mask", model="sshleifer/tiny-distilroberta-base", top_k=2)
outputs = unmasker("My name is <mask>")
self.assertEqual(
nested_simplify(outputs, decimals=6),
[
{"sequence": "My name is Maul", "score": 2.2e-05, "token": 35676, "token_str": " Maul"},
{"sequence": "My name isELS", "score": 2.2e-05, "token": 16416, "token_str": "ELS"},
],
)
outputs = unmasker("The largest city in France is <mask>")
self.assertEqual(
nested_simplify(outputs, decimals=6),
[
{
"sequence": "The largest city in France is Maul",
"score": 2.2e-05,
"token": 35676,
"token_str": " Maul",
},
{"sequence": "The largest city in France isELS", "score": 2.2e-05, "token": 16416, "token_str": "ELS"},
],
)
outputs = unmasker("My name is <mask>", targets=[" Patrick", " Clara", " Teven"], top_k=3)
self.assertEqual(
nested_simplify(outputs, decimals=6),
[
{"sequence": "My name is Patrick", "score": 2.1e-05, "token": 3499, "token_str": " Patrick"},
{"sequence": "My name is Te", "score": 2e-05, "token": 2941, "token_str": " Te"},
{"sequence": "My name is Clara", "score": 2e-05, "token": 13606, "token_str": " Clara"},
],
)
outputs = unmasker("My name is <mask> <mask>", top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=6),
[
[
{
"score": 2.2e-05,
"token": 35676,
"token_str": " Maul",
"sequence": "<s>My name is Maul<mask></s>",
},
{"score": 2.2e-05, "token": 16416, "token_str": "ELS", "sequence": "<s>My name isELS<mask></s>"},
],
[
{
"score": 2.2e-05,
"token": 35676,
"token_str": " Maul",
"sequence": "<s>My name is<mask> Maul</s>",
},
{"score": 2.2e-05, "token": 16416, "token_str": "ELS", "sequence": "<s>My name is<mask>ELS</s>"},
],
],
)
@require_torch_accelerator
def test_fp16_casting(self):
pipe = pipeline(
"fill-mask",
model="hf-internal-testing/tiny-random-distilbert",
device=torch_device,
)
# convert model to fp16
pipe.model.half()
response = pipe("Paris is the [MASK] of France.")
# We actually don't care about the result, we just want to make sure
# it works, meaning the float16 tensor got casted back to float32
# for postprocessing.
self.assertIsInstance(response, list)
@slow
@require_torch
def test_large_model_pt(self):
unmasker = pipeline(task="fill-mask", model="distilbert/distilroberta-base", top_k=2)
self.run_large_test(unmasker)
def run_large_test(self, unmasker):
outputs = unmasker("My name is <mask>")
self.assertEqual(
nested_simplify(outputs),
[
{"sequence": "My name is John", "score": 0.008, "token": 610, "token_str": " John"},
{"sequence": "My name is Chris", "score": 0.007, "token": 1573, "token_str": " Chris"},
],
)
outputs = unmasker("The largest city in France is <mask>")
self.assertEqual(
nested_simplify(outputs),
[
{
"sequence": "The largest city in France is Paris",
"score": 0.251,
"token": 2201,
"token_str": " Paris",
},
{
"sequence": "The largest city in France is Lyon",
"score": 0.214,
"token": 12790,
"token_str": " Lyon",
},
],
)
outputs = unmasker("My name is <mask>", targets=[" Patrick", " Clara", " Teven"], top_k=3)
self.assertEqual(
nested_simplify(outputs),
[
{"sequence": "My name is Patrick", "score": 0.005, "token": 3499, "token_str": " Patrick"},
{"sequence": "My name is Clara", "score": 0.000, "token": 13606, "token_str": " Clara"},
{"sequence": "My name is Te", "score": 0.000, "token": 2941, "token_str": " Te"},
],
)
dummy_str = "Lorem ipsum dolor sit amet, consectetur adipiscing elit," * 100
outputs = unmasker(
"My name is <mask>" + dummy_str,
tokenizer_kwargs={"truncation": True},
)
simplified = nested_simplify(outputs, decimals=4)
self.assertEqual(
[{"sequence": x["sequence"][:100]} for x in simplified],
[
{"sequence": f"My name is,{dummy_str}"[:100]},
{"sequence": f"My name is:,{dummy_str}"[:100]},
],
)
self.assertEqual(
[{k: x[k] for k in x if k != "sequence"} for x in simplified],
[
{"score": 0.2819, "token": 6, "token_str": ","},
{"score": 0.0954, "token": 46686, "token_str": ":,"},
],
)
@require_torch
def test_model_no_pad_pt(self):
unmasker = pipeline(task="fill-mask", model="sshleifer/tiny-distilroberta-base")
unmasker.tokenizer.pad_token_id = None
unmasker.tokenizer.pad_token = None
self.run_pipeline_test(unmasker, [])
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
if tokenizer is None or tokenizer.mask_token_id is None:
self.skipTest(reason="The provided tokenizer has no mask token, (probably reformer or wav2vec2)")
fill_masker = FillMaskPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
examples = [
f"This is another {tokenizer.mask_token} test",
]
return fill_masker, examples
def run_pipeline_test(self, fill_masker, examples):
tokenizer = fill_masker.tokenizer
model = fill_masker.model
outputs = fill_masker(
f"This is a {tokenizer.mask_token}",
)
self.assertEqual(
outputs,
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
)
outputs = fill_masker([f"This is a {tokenizer.mask_token}"])
self.assertEqual(
outputs,
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
)
outputs = fill_masker([f"This is a {tokenizer.mask_token}", f"Another {tokenizer.mask_token} great test."])
self.assertEqual(
outputs,
[
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
],
)
with self.assertRaises(ValueError):
fill_masker([None])
# No mask_token is not supported
with self.assertRaises(PipelineException):
fill_masker("This is")
self.run_test_top_k(model, tokenizer)
self.run_test_targets(model, tokenizer)
self.run_test_top_k_targets(model, tokenizer)
self.fill_mask_with_duplicate_targets_and_top_k(model, tokenizer)
self.fill_mask_with_multiple_masks(model, tokenizer)
def run_test_targets(self, model, tokenizer):
vocab = tokenizer.get_vocab()
targets = sorted(vocab.keys())[:2]
# Pipeline argument
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer, targets=targets)
outputs = fill_masker(f"This is a {tokenizer.mask_token}")
self.assertEqual(
outputs,
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
)
target_ids = {vocab[el] for el in targets}
self.assertEqual({el["token"] for el in outputs}, target_ids)
processed_targets = [tokenizer.decode([x]) for x in target_ids]
self.assertEqual({el["token_str"] for el in outputs}, set(processed_targets))
# Call argument
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=targets)
self.assertEqual(
outputs,
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
)
target_ids = {vocab[el] for el in targets}
self.assertEqual({el["token"] for el in outputs}, target_ids)
processed_targets = [tokenizer.decode([x]) for x in target_ids]
self.assertEqual({el["token_str"] for el in outputs}, set(processed_targets))
# Score equivalence
outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=targets)
tokens = [top_mask["token_str"] for top_mask in outputs]
scores = [top_mask["score"] for top_mask in outputs]
# For some BPE tokenizers, `</w>` is removed during decoding, so `token_str` won't be the same as in `targets`.
if set(tokens) == set(targets):
unmasked_targets = fill_masker(f"This is a {tokenizer.mask_token}", targets=tokens)
target_scores = [top_mask["score"] for top_mask in unmasked_targets]
self.assertEqual(nested_simplify(scores), nested_simplify(target_scores))
# Raises with invalid
with self.assertRaises(ValueError):
outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=[])
# For some tokenizers, `""` is actually in the vocabulary and the expected error won't raised
if "" not in tokenizer.get_vocab():
with self.assertRaises(ValueError):
outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=[""])
with self.assertRaises(ValueError):
outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets="")
def run_test_top_k(self, model, tokenizer):
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer, top_k=2)
outputs = fill_masker(f"This is a {tokenizer.mask_token}")
self.assertEqual(
outputs,
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
)
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
outputs2 = fill_masker(f"This is a {tokenizer.mask_token}", top_k=2)
self.assertEqual(
outputs2,
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
)
self.assertEqual(nested_simplify(outputs), nested_simplify(outputs2))
def run_test_top_k_targets(self, model, tokenizer):
vocab = tokenizer.get_vocab()
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
# top_k=2, ntargets=3
targets = sorted(vocab.keys())[:3]
outputs = fill_masker(f"This is a {tokenizer.mask_token}", top_k=2, targets=targets)
# If we use the most probably targets, and filter differently, we should still
# have the same results
targets2 = [el["token_str"] for el in sorted(outputs, key=lambda x: x["score"], reverse=True)]
# For some BPE tokenizers, `</w>` is removed during decoding, so `token_str` won't be the same as in `targets`.
if set(targets2).issubset(targets):
outputs2 = fill_masker(f"This is a {tokenizer.mask_token}", top_k=3, targets=targets2)
# They should yield exactly the same result
self.assertEqual(nested_simplify(outputs), nested_simplify(outputs2))
def fill_mask_with_duplicate_targets_and_top_k(self, model, tokenizer):
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
vocab = tokenizer.get_vocab()
# String duplicates + id duplicates
targets = sorted(vocab.keys())[:3]
targets = [targets[0], targets[1], targets[0], targets[2], targets[1]]
outputs = fill_masker(f"My name is {tokenizer.mask_token}", targets=targets, top_k=10)
# The target list contains duplicates, so we can't output more
# than them
self.assertEqual(len(outputs), 3)
def fill_mask_with_multiple_masks(self, model, tokenizer):
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
outputs = fill_masker(
f"This is a {tokenizer.mask_token} {tokenizer.mask_token} {tokenizer.mask_token}", top_k=2
)
self.assertEqual(
outputs,
[
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
[
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
{"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
],
],
)

View File

@@ -0,0 +1,297 @@
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import datasets
from huggingface_hub import ImageClassificationOutputElement
from transformers import (
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
PreTrainedTokenizerBase,
is_torch_available,
is_vision_available,
)
from transformers.pipelines import ImageClassificationPipeline, pipeline
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_torch,
require_vision,
slow,
)
from .test_pipelines_common import ANY
if is_torch_available():
import torch
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
@is_pipeline_test
@require_torch
@require_vision
class ImageClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
_dataset = None
@classmethod
def _load_dataset(cls):
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
if cls._dataset is None:
# we use revision="refs/pr/1" until the PR is merged
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
cls._dataset = datasets.load_dataset(
"hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1"
)
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
image_classifier = ImageClassificationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
top_k=2,
)
examples = [
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
]
return image_classifier, examples
def run_pipeline_test(self, image_classifier, examples):
self._load_dataset()
outputs = image_classifier("./tests/fixtures/tests_samples/COCO/000000039769.png")
self.assertEqual(
outputs,
[
{"score": ANY(float), "label": ANY(str)},
{"score": ANY(float), "label": ANY(str)},
],
)
# Accepts URL + PIL.Image + lists
outputs = image_classifier(
[
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
# RGBA
self._dataset[0]["image"],
# LA
self._dataset[1]["image"],
# L
self._dataset[2]["image"],
]
)
self.assertEqual(
outputs,
[
[
{"score": ANY(float), "label": ANY(str)},
{"score": ANY(float), "label": ANY(str)},
],
[
{"score": ANY(float), "label": ANY(str)},
{"score": ANY(float), "label": ANY(str)},
],
[
{"score": ANY(float), "label": ANY(str)},
{"score": ANY(float), "label": ANY(str)},
],
[
{"score": ANY(float), "label": ANY(str)},
{"score": ANY(float), "label": ANY(str)},
],
[
{"score": ANY(float), "label": ANY(str)},
{"score": ANY(float), "label": ANY(str)},
],
],
)
for single_output in outputs:
for output_element in single_output:
compare_pipeline_output_to_hub_spec(output_element, ImageClassificationOutputElement)
@require_torch
def test_small_model_pt(self):
small_model = "hf-internal-testing/tiny-random-vit"
image_classifier = pipeline("image-classification", model=small_model)
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=4),
[{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}],
)
outputs = image_classifier(
[
"http://images.cocodataset.org/val2017/000000039769.jpg",
"http://images.cocodataset.org/val2017/000000039769.jpg",
],
top_k=2,
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}],
[{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}],
],
)
def test_custom_tokenizer(self):
tokenizer = PreTrainedTokenizerBase()
# Assert that the pipeline can be initialized with a feature extractor that is not in any mapping
image_classifier = pipeline(
"image-classification", model="hf-internal-testing/tiny-random-vit", tokenizer=tokenizer
)
self.assertIs(image_classifier.tokenizer, tokenizer)
@require_torch
def test_torch_float16_pipeline(self):
image_classifier = pipeline(
"image-classification", model="hf-internal-testing/tiny-random-vit", dtype=torch.float16
)
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=3),
[{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}],
)
@require_torch
def test_torch_bfloat16_pipeline(self):
image_classifier = pipeline(
"image-classification", model="hf-internal-testing/tiny-random-vit", dtype=torch.bfloat16
)
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=3),
[{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}],
)
@slow
@require_torch
def test_perceiver(self):
# Perceiver is not tested by `run_pipeline_test` properly.
# That is because the type of feature_extractor and model preprocessor need to be kept
# in sync, which is not the case in the current design
image_classifier = pipeline("image-classification", model="deepmind/vision-perceiver-conv")
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.4385, "label": "tabby, tabby cat"},
{"score": 0.321, "label": "tiger cat"},
{"score": 0.0502, "label": "Egyptian cat"},
{"score": 0.0137, "label": "crib, cot"},
{"score": 0.007, "label": "radiator"},
],
)
image_classifier = pipeline("image-classification", model="deepmind/vision-perceiver-fourier")
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.5658, "label": "tabby, tabby cat"},
{"score": 0.1309, "label": "tiger cat"},
{"score": 0.0722, "label": "Egyptian cat"},
{"score": 0.0707, "label": "remote control, remote"},
{"score": 0.0082, "label": "computer keyboard, keypad"},
],
)
image_classifier = pipeline("image-classification", model="deepmind/vision-perceiver-learned")
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.3022, "label": "tabby, tabby cat"},
{"score": 0.2362, "label": "Egyptian cat"},
{"score": 0.1856, "label": "tiger cat"},
{"score": 0.0324, "label": "remote control, remote"},
{"score": 0.0096, "label": "quilt, comforter, comfort, puff"},
],
)
@slow
@require_torch
def test_multilabel_classification(self):
small_model = "hf-internal-testing/tiny-random-vit"
# Sigmoid is applied for multi-label classification
image_classifier = pipeline("image-classification", model=small_model)
image_classifier.model.config.problem_type = "multi_label_classification"
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=4),
[{"label": "LABEL_1", "score": 0.5356}, {"label": "LABEL_0", "score": 0.4612}],
)
outputs = image_classifier(
[
"http://images.cocodataset.org/val2017/000000039769.jpg",
"http://images.cocodataset.org/val2017/000000039769.jpg",
]
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[{"label": "LABEL_1", "score": 0.5356}, {"label": "LABEL_0", "score": 0.4612}],
[{"label": "LABEL_1", "score": 0.5356}, {"label": "LABEL_0", "score": 0.4612}],
],
)
@slow
@require_torch
def test_function_to_apply(self):
small_model = "hf-internal-testing/tiny-random-vit"
# Sigmoid is applied for multi-label classification
image_classifier = pipeline("image-classification", model=small_model)
outputs = image_classifier(
"http://images.cocodataset.org/val2017/000000039769.jpg",
function_to_apply="sigmoid",
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[{"label": "LABEL_1", "score": 0.5356}, {"label": "LABEL_0", "score": 0.4612}],
)

View File

@@ -0,0 +1,139 @@
# Copyright 2024 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import pytest
from transformers import (
MODEL_MAPPING,
TOKENIZER_MAPPING,
ImageFeatureExtractionPipeline,
is_torch_available,
is_vision_available,
pipeline,
)
from transformers.testing_utils import is_pipeline_test, nested_simplify, require_torch
if is_torch_available():
import torch
if is_vision_available():
from PIL import Image
# We will verify our results on an image of cute cats
def prepare_img():
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
return image
@is_pipeline_test
class ImageFeatureExtractionPipelineTests(unittest.TestCase):
model_mapping = MODEL_MAPPING
@require_torch
def test_small_model_pt(self):
feature_extractor = pipeline(task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit")
img = prepare_img()
outputs = feature_extractor(img)
self.assertEqual(
nested_simplify(outputs[0][0]),
[-1.417, -0.392, -1.264, -1.196, 1.648, 0.885, 0.56, -0.606, -1.175, 0.823, 1.912, 0.081, -0.053, 1.119, -0.062, -1.757, -0.571, 0.075, 0.959, 0.118, 1.201, -0.672, -0.498, 0.364, 0.937, -1.623, 0.228, 0.19, 1.697, -1.115, 0.583, -0.981]) # fmt: skip
@require_torch
def test_small_model_w_pooler_pt(self):
feature_extractor = pipeline(
task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit-w-pooler"
)
img = prepare_img()
outputs = feature_extractor(img, pool=True)
self.assertEqual(
nested_simplify(outputs[0]),
[-0.056, 0.083, 0.021, 0.038, 0.242, -0.279, -0.033, -0.003, 0.200, -0.192, 0.045, -0.095, -0.077, 0.017, -0.058, -0.063, -0.029, -0.204, 0.014, 0.042, 0.305, -0.205, -0.099, 0.146, -0.287, 0.020, 0.168, -0.052, 0.046, 0.048, -0.156, 0.093]) # fmt: skip
@require_torch
def test_image_processing_small_model_pt(self):
feature_extractor = pipeline(task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit")
# test with image processor parameters
image_processor_kwargs = {"size": {"height": 300, "width": 300}}
img = prepare_img()
with pytest.raises(ValueError):
# Image doesn't match model input size
feature_extractor(img, image_processor_kwargs=image_processor_kwargs)
image_processor_kwargs = {"image_mean": [0, 0, 0], "image_std": [1, 1, 1]}
img = prepare_img()
outputs = feature_extractor(img, image_processor_kwargs=image_processor_kwargs)
self.assertEqual(np.squeeze(outputs).shape, (226, 32))
# Test pooling option
outputs = feature_extractor(img, pool=True)
self.assertEqual(np.squeeze(outputs).shape, (32,))
@require_torch
def test_return_tensors_pt(self):
feature_extractor = pipeline(task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit")
img = prepare_img()
outputs = feature_extractor(img, return_tensors=True)
self.assertTrue(torch.is_tensor(outputs))
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
if image_processor is None:
self.skipTest(reason="No image processor")
elif type(model.config) in TOKENIZER_MAPPING:
self.skipTest(
reason="This is a bimodal model, we need to find a more consistent way to switch on those models."
)
elif model.config.is_encoder_decoder:
self.skipTest(
"""encoder_decoder models are trickier for this pipeline.
Do we want encoder + decoder inputs to get some features?
Do we want encoder only features ?
For now ignore those.
"""
)
feature_extractor_pipeline = ImageFeatureExtractionPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
img = prepare_img()
return feature_extractor_pipeline, [img, img]
def run_pipeline_test(self, feature_extractor, examples):
imgs = examples
outputs = feature_extractor(imgs[0])
self.assertEqual(len(outputs), 1)
outputs = feature_extractor(imgs)
self.assertEqual(len(outputs), 2)

View File

@@ -0,0 +1,763 @@
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import tempfile
import unittest
import datasets
import httpx
import numpy as np
from datasets import load_dataset
from huggingface_hub import ImageSegmentationOutputElement
from huggingface_hub.utils import insecure_hashlib
from transformers import (
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING,
AutoImageProcessor,
AutoModelForImageSegmentation,
AutoModelForInstanceSegmentation,
DetrForSegmentation,
ImageSegmentationPipeline,
MaskFormerForInstanceSegmentation,
is_vision_available,
pipeline,
)
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_timm,
require_torch,
require_vision,
slow,
)
from .test_pipelines_common import ANY
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
def hashimage(image: Image) -> str:
m = insecure_hashlib.md5(image.tobytes())
return m.hexdigest()[:10]
def mask_to_test_readable(mask: Image) -> dict:
npimg = np.array(mask)
white_pixels = (npimg == 255).sum()
shape = npimg.shape
return {"hash": hashimage(mask), "white_pixels": white_pixels, "shape": shape}
def mask_to_test_readable_only_shape(mask: Image) -> dict:
npimg = np.array(mask)
shape = npimg.shape
return {"shape": shape}
@is_pipeline_test
@require_vision
@require_timm
@require_torch
class ImageSegmentationPipelineTests(unittest.TestCase):
model_mapping = dict(
(list(MODEL_FOR_IMAGE_SEGMENTATION_MAPPING.items()) if MODEL_FOR_IMAGE_SEGMENTATION_MAPPING else [])
+ (MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING.items() if MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING else [])
+ (MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items() if MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING else [])
)
_dataset = None
@classmethod
def _load_dataset(cls):
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
if cls._dataset is None:
# we use revision="refs/pr/1" until the PR is merged
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
cls._dataset = datasets.load_dataset(
"hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1"
)
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
image_segmenter = ImageSegmentationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
return image_segmenter, [
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
]
def run_pipeline_test(self, image_segmenter, examples):
self._load_dataset()
outputs = image_segmenter(
"./tests/fixtures/tests_samples/COCO/000000039769.png",
threshold=0.0,
mask_threshold=0,
overlap_mask_area_threshold=0,
)
self.assertIsInstance(outputs, list)
n = len(outputs)
if isinstance(image_segmenter.model, (MaskFormerForInstanceSegmentation, DetrForSegmentation)):
# Instance segmentation (maskformer, and detr) have a slot for null class
# and can output nothing even with a low threshold
self.assertGreaterEqual(n, 0)
else:
self.assertGreaterEqual(n, 1)
# XXX: PIL.Image implements __eq__ which bypasses ANY, so we inverse the comparison
# to make it work
self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n, outputs)
# RGBA
outputs = image_segmenter(
self._dataset[0]["image"], threshold=0.0, mask_threshold=0, overlap_mask_area_threshold=0
)
m = len(outputs)
self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs)
# LA
outputs = image_segmenter(
self._dataset[1]["image"], threshold=0.0, mask_threshold=0, overlap_mask_area_threshold=0
)
m = len(outputs)
self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs)
# L
outputs = image_segmenter(
self._dataset[2]["image"], threshold=0.0, mask_threshold=0, overlap_mask_area_threshold=0
)
m = len(outputs)
self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs)
if isinstance(image_segmenter.model, DetrForSegmentation):
# We need to test batch_size with images with the same size.
# Detr doesn't normalize the size of the images, meaning we can have
# 800x800 or 800x1200, meaning we cannot batch simply.
# We simply bail on this
batch_size = 1
else:
batch_size = 2
# 5 times the same image so the output shape is predictable
batch = [
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
]
outputs = image_segmenter(
batch,
threshold=0.0,
mask_threshold=0,
overlap_mask_area_threshold=0,
batch_size=batch_size,
)
self.assertEqual(len(batch), len(outputs))
self.assertEqual(len(outputs[0]), n)
self.assertEqual(
[
[{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
[{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
[{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
[{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
[{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
],
outputs,
f"Expected [{n}, {n}, {n}, {n}, {n}], got {[len(item) for item in outputs]}",
)
for single_output in outputs:
for output_element in single_output:
compare_pipeline_output_to_hub_spec(output_element, ImageSegmentationOutputElement)
@require_torch
def test_small_model_pt_no_panoptic(self):
model_id = "hf-internal-testing/tiny-random-mobilevit"
# The default task is `image-classification` we need to override
pipe = pipeline(task="image-segmentation", model=model_id)
# This model does NOT support neither `instance` nor `panoptic`
# We should error out
with self.assertRaises(ValueError) as e:
pipe("http://images.cocodataset.org/val2017/000000039769.jpg", subtask="panoptic")
self.assertEqual(
str(e.exception),
"Subtask panoptic is not supported for model <class"
" 'transformers.models.mobilevit.modeling_mobilevit.MobileViTForSemanticSegmentation'>",
)
with self.assertRaises(ValueError) as e:
pipe("http://images.cocodataset.org/val2017/000000039769.jpg", subtask="instance")
self.assertEqual(
str(e.exception),
"Subtask instance is not supported for model <class"
" 'transformers.models.mobilevit.modeling_mobilevit.MobileViTForSemanticSegmentation'>",
)
@require_torch
def test_small_model_pt(self):
model_id = "hf-internal-testing/tiny-detr-mobilenetsv3-panoptic"
model = AutoModelForImageSegmentation.from_pretrained(model_id)
image_processor = AutoImageProcessor.from_pretrained(model_id)
image_segmenter = ImageSegmentationPipeline(
model=model,
image_processor=image_processor,
subtask="panoptic",
threshold=0.0,
mask_threshold=0.0,
overlap_mask_area_threshold=0.0,
)
outputs = image_segmenter(
"http://images.cocodataset.org/val2017/000000039769.jpg",
)
# Shortening by hashing
for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"])
# This is extremely brittle, and those values are made specific for the CI.
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": 0.004,
"label": "LABEL_215",
"mask": {"hash": "a01498ca7c", "shape": (480, 640), "white_pixels": 307200},
},
],
)
outputs = image_segmenter(
[
"http://images.cocodataset.org/val2017/000000039769.jpg",
"http://images.cocodataset.org/val2017/000000039769.jpg",
],
)
for output in outputs:
for o in output:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{
"score": 0.004,
"label": "LABEL_215",
"mask": {"hash": "a01498ca7c", "shape": (480, 640), "white_pixels": 307200},
},
],
[
{
"score": 0.004,
"label": "LABEL_215",
"mask": {"hash": "a01498ca7c", "shape": (480, 640), "white_pixels": 307200},
},
],
],
)
output = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", subtask="instance")
for o in output:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(output, decimals=4),
[
{
"score": 0.004,
"label": "LABEL_215",
"mask": {"hash": "a01498ca7c", "shape": (480, 640), "white_pixels": 307200},
},
],
)
# This must be surprising to the reader.
# The `panoptic` returns only LABEL_215, and this returns 3 labels.
#
output = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", subtask="semantic")
output_masks = [o["mask"] for o in output]
# page links (to visualize)
expected_masks = [
"https://huggingface.co/datasets/hf-internal-testing/mask-for-image-segmentation-tests/blob/main/mask_0.png",
"https://huggingface.co/datasets/hf-internal-testing/mask-for-image-segmentation-tests/blob/main/mask_1.png",
"https://huggingface.co/datasets/hf-internal-testing/mask-for-image-segmentation-tests/blob/main/mask_2.png",
]
# actual links to get files
expected_masks = [x.replace("/blob/", "/resolve/") for x in expected_masks]
expected_masks = [
Image.open(io.BytesIO(httpx.get(image, follow_redirects=True).content)) for image in expected_masks
]
# Convert masks to numpy array
output_masks = [np.array(x) for x in output_masks]
expected_masks = [np.array(x) for x in expected_masks]
self.assertEqual(output_masks[0].shape, expected_masks[0].shape)
self.assertEqual(output_masks[1].shape, expected_masks[1].shape)
self.assertEqual(output_masks[2].shape, expected_masks[2].shape)
# With un-trained tiny random models, the output `logits` tensor is very likely to contain many values
# close to each other, which cause `argmax` to give quite different results when running the test on 2
# environments. We use a lower threshold `0.9` here to avoid flakiness.
self.assertGreaterEqual(np.mean(output_masks[0] == expected_masks[0]), 0.9)
self.assertGreaterEqual(np.mean(output_masks[1] == expected_masks[1]), 0.9)
self.assertGreaterEqual(np.mean(output_masks[2] == expected_masks[2]), 0.9)
for o in output:
o["mask"] = mask_to_test_readable_only_shape(o["mask"])
self.maxDiff = None
self.assertEqual(
nested_simplify(output, decimals=4),
[
{
"label": "LABEL_88",
"mask": {"shape": (480, 640)},
"score": None,
},
{
"label": "LABEL_101",
"mask": {"shape": (480, 640)},
"score": None,
},
{
"label": "LABEL_215",
"mask": {"shape": (480, 640)},
"score": None,
},
],
)
@require_torch
def test_small_model_pt_semantic(self):
model_id = "hf-internal-testing/tiny-random-beit-pipeline"
image_segmenter = pipeline(model=model_id)
outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg")
for o in outputs:
# shortening by hashing
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": None,
"label": "LABEL_0",
"mask": {"hash": "42d0907228", "shape": (480, 640), "white_pixels": 10714},
},
{
"score": None,
"label": "LABEL_1",
"mask": {"hash": "46b8cc3976", "shape": (480, 640), "white_pixels": 296486},
},
],
)
@require_torch
@slow
def test_integration_torch_image_segmentation(self):
model_id = "facebook/detr-resnet-50-panoptic"
image_segmenter = pipeline(
"image-segmentation",
model=model_id,
threshold=0.0,
overlap_mask_area_threshold=0.0,
)
outputs = image_segmenter(
"http://images.cocodataset.org/val2017/000000039769.jpg",
)
# Shortening by hashing
for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": 0.9094,
"label": "blanket",
"mask": {"hash": "dcff19a97a", "shape": (480, 640), "white_pixels": 16617},
},
{
"score": 0.9941,
"label": "cat",
"mask": {"hash": "9c0af87bd0", "shape": (480, 640), "white_pixels": 59185},
},
{
"score": 0.9987,
"label": "remote",
"mask": {"hash": "c7870600d6", "shape": (480, 640), "white_pixels": 4182},
},
{
"score": 0.9995,
"label": "remote",
"mask": {"hash": "ef899a25fd", "shape": (480, 640), "white_pixels": 2275},
},
{
"score": 0.9722,
"label": "couch",
"mask": {"hash": "37b8446ac5", "shape": (480, 640), "white_pixels": 172380},
},
{
"score": 0.9994,
"label": "cat",
"mask": {"hash": "6a09d3655e", "shape": (480, 640), "white_pixels": 52561},
},
],
)
outputs = image_segmenter(
[
"http://images.cocodataset.org/val2017/000000039769.jpg",
"http://images.cocodataset.org/val2017/000000039769.jpg",
],
)
# Shortening by hashing
for output in outputs:
for o in output:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{
"score": 0.9094,
"label": "blanket",
"mask": {"hash": "dcff19a97a", "shape": (480, 640), "white_pixels": 16617},
},
{
"score": 0.9941,
"label": "cat",
"mask": {"hash": "9c0af87bd0", "shape": (480, 640), "white_pixels": 59185},
},
{
"score": 0.9987,
"label": "remote",
"mask": {"hash": "c7870600d6", "shape": (480, 640), "white_pixels": 4182},
},
{
"score": 0.9995,
"label": "remote",
"mask": {"hash": "ef899a25fd", "shape": (480, 640), "white_pixels": 2275},
},
{
"score": 0.9722,
"label": "couch",
"mask": {"hash": "37b8446ac5", "shape": (480, 640), "white_pixels": 172380},
},
{
"score": 0.9994,
"label": "cat",
"mask": {"hash": "6a09d3655e", "shape": (480, 640), "white_pixels": 52561},
},
],
[
{
"score": 0.9094,
"label": "blanket",
"mask": {"hash": "dcff19a97a", "shape": (480, 640), "white_pixels": 16617},
},
{
"score": 0.9941,
"label": "cat",
"mask": {"hash": "9c0af87bd0", "shape": (480, 640), "white_pixels": 59185},
},
{
"score": 0.9987,
"label": "remote",
"mask": {"hash": "c7870600d6", "shape": (480, 640), "white_pixels": 4182},
},
{
"score": 0.9995,
"label": "remote",
"mask": {"hash": "ef899a25fd", "shape": (480, 640), "white_pixels": 2275},
},
{
"score": 0.9722,
"label": "couch",
"mask": {"hash": "37b8446ac5", "shape": (480, 640), "white_pixels": 172380},
},
{
"score": 0.9994,
"label": "cat",
"mask": {"hash": "6a09d3655e", "shape": (480, 640), "white_pixels": 52561},
},
],
],
)
@require_torch
@slow
def test_threshold(self):
model_id = "facebook/detr-resnet-50-panoptic"
image_segmenter = pipeline("image-segmentation", model=model_id)
outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=0.999)
# Shortening by hashing
for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": 0.9995,
"label": "remote",
"mask": {"hash": "d02404f578", "shape": (480, 640), "white_pixels": 2789},
},
{
"score": 0.9994,
"label": "cat",
"mask": {"hash": "eaa115b40c", "shape": (480, 640), "white_pixels": 304411},
},
],
)
outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=0.5)
for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": 0.9941,
"label": "cat",
"mask": {"hash": "9c0af87bd0", "shape": (480, 640), "white_pixels": 59185},
},
{
"score": 0.9987,
"label": "remote",
"mask": {"hash": "c7870600d6", "shape": (480, 640), "white_pixels": 4182},
},
{
"score": 0.9995,
"label": "remote",
"mask": {"hash": "ef899a25fd", "shape": (480, 640), "white_pixels": 2275},
},
{
"score": 0.9722,
"label": "couch",
"mask": {"hash": "37b8446ac5", "shape": (480, 640), "white_pixels": 172380},
},
{
"score": 0.9994,
"label": "cat",
"mask": {"hash": "6a09d3655e", "shape": (480, 640), "white_pixels": 52561},
},
],
)
@require_torch
@slow
def test_maskformer(self):
threshold = 0.8
model_id = "facebook/maskformer-swin-base-ade"
model = AutoModelForInstanceSegmentation.from_pretrained(model_id)
image_processor = AutoImageProcessor.from_pretrained(model_id)
image_segmenter = pipeline("image-segmentation", model=model, image_processor=image_processor)
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
image = ds[0]["image"].convert("RGB")
outputs = image_segmenter(image, threshold=threshold)
# Shortening by hashing
for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": 0.9974,
"label": "wall",
"mask": {"hash": "a547b7c062", "shape": (512, 683), "white_pixels": 14252},
},
{
"score": 0.949,
"label": "house",
"mask": {"hash": "0da9b7b38f", "shape": (512, 683), "white_pixels": 132177},
},
{
"score": 0.9995,
"label": "grass",
"mask": {"hash": "1d07ea0a26", "shape": (512, 683), "white_pixels": 53444},
},
{
"score": 0.9976,
"label": "tree",
"mask": {"hash": "6cdc97c7da", "shape": (512, 683), "white_pixels": 7944},
},
{
"score": 0.8239,
"label": "plant",
"mask": {"hash": "1ab4ce378f", "shape": (512, 683), "white_pixels": 4136},
},
{
"score": 0.9942,
"label": "road, route",
"mask": {"hash": "39c5d17be5", "shape": (512, 683), "white_pixels": 1941},
},
{
"score": 1.0,
"label": "sky",
"mask": {"hash": "a3756324a6", "shape": (512, 683), "white_pixels": 135802},
},
],
)
@require_torch
@slow
def test_oneformer(self):
image_segmenter = pipeline(model="shi-labs/oneformer_ade20k_swin_tiny")
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
image = ds[0]["image"].convert("RGB")
outputs = image_segmenter(image, threshold=0.99)
# Shortening by hashing
for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": 0.9981,
"label": "grass",
"mask": {"hash": "3a92904d4c", "white_pixels": 118131, "shape": (512, 683)},
},
{
"score": 0.9992,
"label": "sky",
"mask": {"hash": "fa2300cc9a", "white_pixels": 231565, "shape": (512, 683)},
},
],
)
# Different task
outputs = image_segmenter(image, threshold=0.99, subtask="instance")
# Shortening by hashing
for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": 0.9991,
"label": "sky",
"mask": {"hash": "8b1ffad016", "white_pixels": 230566, "shape": (512, 683)},
},
{
"score": 0.9981,
"label": "grass",
"mask": {"hash": "9bbdf83d3d", "white_pixels": 119130, "shape": (512, 683)},
},
],
)
# Different task
outputs = image_segmenter(image, subtask="semantic")
# Shortening by hashing
for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": None,
"label": "wall",
"mask": {"hash": "897fb20b7f", "white_pixels": 14506, "shape": (512, 683)},
},
{
"score": None,
"label": "building",
"mask": {"hash": "f2a68c63e4", "white_pixels": 125019, "shape": (512, 683)},
},
{
"score": None,
"label": "sky",
"mask": {"hash": "e0ca3a548e", "white_pixels": 135330, "shape": (512, 683)},
},
{
"score": None,
"label": "tree",
"mask": {"hash": "7c9544bcac", "white_pixels": 16263, "shape": (512, 683)},
},
{
"score": None,
"label": "road, route",
"mask": {"hash": "2c7704e491", "white_pixels": 2143, "shape": (512, 683)},
},
{
"score": None,
"label": "grass",
"mask": {"hash": "bf6c2867e0", "white_pixels": 53040, "shape": (512, 683)},
},
{
"score": None,
"label": "plant",
"mask": {"hash": "93c4b7199e", "white_pixels": 3335, "shape": (512, 683)},
},
{
"score": None,
"label": "house",
"mask": {"hash": "93ec419ad5", "white_pixels": 60, "shape": (512, 683)},
},
],
)
def test_save_load(self):
model_id = "hf-internal-testing/tiny-detr-mobilenetsv3-panoptic"
model = AutoModelForImageSegmentation.from_pretrained(model_id)
image_processor = AutoImageProcessor.from_pretrained(model_id)
image_segmenter = pipeline(
task="image-segmentation",
model=model,
image_processor=image_processor,
)
with tempfile.TemporaryDirectory() as tmpdirname:
image_segmenter.save_pretrained(tmpdirname)
pipeline(task="image-segmentation", model=tmpdirname)

View File

@@ -0,0 +1,388 @@
# Copyright 2024 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import base64
import unittest
from transformers import MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING, is_vision_available
from transformers.pipelines import ImageTextToTextPipeline, pipeline
from transformers.testing_utils import (
is_pipeline_test,
require_torch,
require_vision,
slow,
)
from .test_pipelines_common import ANY
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
@is_pipeline_test
@require_vision
class ImageTextToTextPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING
def get_test_pipeline(self, model, tokenizer, processor, image_processor, dtype="float32"):
pipe = ImageTextToTextPipeline(model=model, processor=processor, dtype=dtype, max_new_tokens=10)
image_token = getattr(processor.tokenizer, "image_token", "")
examples = [
{
"images": Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"text": f"{image_token}This is a ",
},
{
"images": "./tests/fixtures/tests_samples/COCO/000000039769.png",
"text": f"{image_token}Here I see a ",
},
]
return pipe, examples
def run_pipeline_test(self, pipe, examples):
outputs = pipe(examples[0].get("images"), text=examples[0].get("text"))
self.assertEqual(
outputs,
[
{"input_text": ANY(str), "generated_text": ANY(str)},
],
)
@require_torch
def test_small_model_pt_token_text_only(self):
pipe = pipeline("image-text-to-text", model="llava-hf/llava-interleave-qwen-0.5b-hf")
text = "What is the capital of France? Assistant:"
outputs = pipe(text=text)
self.assertEqual(
outputs,
[
{
"input_text": "What is the capital of France? Assistant:",
"generated_text": "What is the capital of France? Assistant: The capital of France is Paris.",
}
],
)
messages = [
[
{
"role": "user",
"content": [
{"type": "text", "text": "Write a poem on Hugging Face, the company"},
],
},
],
[
{
"role": "user",
"content": [
{"type": "text", "text": "What is the capital of France?"},
],
},
],
]
outputs = pipe(text=messages)
self.assertEqual(
outputs,
[
[
{
"input_text": [
{
"role": "user",
"content": [{"type": "text", "text": "Write a poem on Hugging Face, the company"}],
}
],
"generated_text": [
{
"role": "user",
"content": [{"type": "text", "text": "Write a poem on Hugging Face, the company"}],
},
{
"role": "assistant",
"content": "Hugging Face, a company of minds\nWith tools and services that make our lives easier\nFrom natural language processing\nTo machine learning and more, they've got it all\n\nThey've made it possible for us to be more\nInformed and efficient, with their tools and services\nFrom image and speech recognition\nTo text and language translation, they've got it all\n\nThey've made it possible for us to be more\nInformed and efficient, with their tools and services\nFrom image and speech recognition\nTo text and language translation, they've got it all\n\nThey've made it possible for us to be more\nInformed and efficient, with their tools and services\nFrom image and speech recognition\nTo text and language translation, they've got it all\n\nThey've made it possible for us to be more\nInformed and efficient, with their tools and services\nFrom image and speech recognition\nTo text and language translation, they've got it all\n\nThey've made it possible for us to be more\nInformed and efficient, with their tools and services\nFrom image and speech recognition\nTo text and language translation, they've got it all\n\nThey've made it possible for us to be more\nInformed and efficient, with their tools and",
},
],
}
],
[
{
"input_text": [
{"role": "user", "content": [{"type": "text", "text": "What is the capital of France?"}]}
],
"generated_text": [
{"role": "user", "content": [{"type": "text", "text": "What is the capital of France?"}]},
{"role": "assistant", "content": "Paris"},
],
}
],
],
)
@require_torch
def test_small_model_pt_token(self):
pipe = pipeline("image-text-to-text", model="llava-hf/llava-interleave-qwen-0.5b-hf")
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
text = "<image> What this is? Assistant: This is"
outputs = pipe(image, text=text)
self.assertEqual(
outputs,
[
{
"input_text": "<image> What this is? Assistant: This is",
"generated_text": "<image> What this is? Assistant: This is a photo of two cats lying on a pink blanket. The cats are sleeping and appear to be comfortable. The photo captures a moment of tranquility and companionship between the two feline friends.",
}
],
)
outputs = pipe([image, image], text=[text, text])
self.assertEqual(
outputs,
[
{
"input_text": "<image> What this is? Assistant: This is",
"generated_text": "<image> What this is? Assistant: This is a photo of two cats lying on a pink blanket. The cats are facing the camera, and they appear to be sleeping or resting. The blanket is placed on a couch, and the cats are positioned in such a way that they are facing the camera. The image captures a peaceful moment between the two cats, and it's a great way to showcase their cuteness and relaxed demeanor.",
},
{
"input_text": "<image> What this is? Assistant: This is",
"generated_text": "<image> What this is? Assistant: This is a photo of two cats lying on a pink blanket. The cats are facing the camera, and they appear to be sleeping or resting. The blanket is placed on a couch, and the cats are positioned in such a way that they are facing the camera. The image captures a peaceful moment between the two cats, and it's a great way to showcase their cuteness and relaxed demeanor.",
},
],
)
@require_torch
def test_consistent_batching_behaviour(self):
pipe = pipeline("image-text-to-text", model="microsoft/kosmos-2-patch14-224")
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
prompt = "a photo of"
outputs = pipe([image, image], text=[prompt, prompt], max_new_tokens=10)
outputs_batched = pipe([image, image], text=[prompt, prompt], batch_size=2, max_new_tokens=10)
self.assertEqual(outputs, outputs_batched)
@slow
@require_torch
def test_model_pt_chat_template(self):
pipe = pipeline("image-text-to-text", model="llava-hf/llava-interleave-qwen-0.5b-hf")
image_ny = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
image_chicago = "https://cdn.britannica.com/59/94459-050-DBA42467/Skyline-Chicago.jpg"
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Whats the difference between these two images?"},
{"type": "image"},
{"type": "image"},
],
}
]
outputs = pipe([image_ny, image_chicago], text=messages, return_full_text=True, max_new_tokens=10)
self.assertEqual(
outputs,
[
{
"input_text": [
{
"role": "user",
"content": [
{"type": "text", "text": "Whats the difference between these two images?"},
{
"type": "image",
"image": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
},
{
"type": "image",
"image": "https://cdn.britannica.com/59/94459-050-DBA42467/Skyline-Chicago.jpg",
},
],
}
],
"generated_text": [
{
"role": "user",
"content": [
{"type": "text", "text": "Whats the difference between these two images?"},
{
"type": "image",
"image": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
},
{
"type": "image",
"image": "https://cdn.britannica.com/59/94459-050-DBA42467/Skyline-Chicago.jpg",
},
],
},
{
"role": "assistant",
"content": "The first image shows a statue of Liberty in the",
},
],
}
],
)
@slow
@require_torch
def test_model_pt_chat_template_continue_final_message(self):
pipe = pipeline("image-text-to-text", model="llava-hf/llava-interleave-qwen-0.5b-hf")
messages = [
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
},
{"type": "text", "text": "Describe this image."},
],
},
{
"role": "assistant",
"content": [
{"type": "text", "text": "There is a dog and"},
],
},
]
outputs = pipe(text=messages, max_new_tokens=10)
self.assertEqual(
outputs,
[
{
"input_text": [
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
},
{"type": "text", "text": "Describe this image."},
],
},
{"role": "assistant", "content": [{"type": "text", "text": "There is a dog and"}]},
],
"generated_text": [
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
},
{"type": "text", "text": "Describe this image."},
],
},
{
"role": "assistant",
"content": [
{
"type": "text",
"text": "There is a dog and a person in the image. The dog is sitting",
}
],
},
],
}
],
)
@slow
@require_torch
def test_model_pt_chat_template_new_text(self):
pipe = pipeline("image-text-to-text", model="llava-hf/llava-interleave-qwen-0.5b-hf")
messages = [
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
},
{"type": "text", "text": "Describe this image."},
],
}
]
outputs = pipe(text=messages, return_full_text=False, max_new_tokens=10)
self.assertEqual(
outputs,
[
{
"input_text": [
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
},
{"type": "text", "text": "Describe this image."},
],
}
],
"generated_text": "In the image, a woman is sitting on the",
}
],
)
@slow
@require_torch
def test_model_pt_chat_template_image_url(self):
pipe = pipeline("image-text-to-text", model="llava-hf/llava-interleave-qwen-0.5b-hf")
messages = [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
},
},
{"type": "text", "text": "Describe this image in one sentence."},
],
}
]
outputs = pipe(text=messages, return_full_text=False, max_new_tokens=10)[0]["generated_text"]
self.assertEqual(outputs, "A statue of liberty in the foreground of a city")
@slow
@require_torch
def test_model_pt_chat_template_image_url_base64(self):
with open("./tests/fixtures/tests_samples/COCO/000000039769.png", "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode("utf-8")
pipe = pipeline("image-text-to-text", model="llava-hf/llava-onevision-qwen2-0.5b-ov-hf")
messages = [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
},
{"type": "text", "text": "Describe this image in one sentence."},
],
}
]
outputs = pipe(text=messages, return_full_text=False, max_new_tokens=10)[0]["generated_text"]
self.assertEqual(outputs, "Two cats are sleeping on a pink blanket, with")

View File

@@ -0,0 +1,91 @@
# Copyright 2023 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers import (
MODEL_FOR_IMAGE_TO_IMAGE_MAPPING,
AutoImageProcessor,
AutoModelForImageToImage,
ImageToImagePipeline,
is_vision_available,
pipeline,
)
from transformers.testing_utils import (
is_pipeline_test,
require_torch,
require_vision,
slow,
)
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
@is_pipeline_test
@require_torch
@require_vision
class ImageToImagePipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_IMAGE_TO_IMAGE_MAPPING
examples = [
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
]
@require_torch
@require_vision
@slow
def test_pipeline(self, dtype="float32"):
model_id = "caidas/swin2SR-classical-sr-x2-64"
upscaler = pipeline("image-to-image", model=model_id, dtype=dtype)
upscaled_list = upscaler(self.examples)
self.assertEqual(len(upscaled_list), len(self.examples))
for output in upscaled_list:
self.assertIsInstance(output, Image.Image)
self.assertEqual(upscaled_list[0].size, (1296, 976))
self.assertEqual(upscaled_list[1].size, (1296, 976))
@require_torch
@require_vision
@slow
def test_pipeline_fp16(self):
self.test_pipeline(dtype="float16")
@require_torch
@require_vision
@slow
def test_pipeline_model_processor(self):
model_id = "caidas/swin2SR-classical-sr-x2-64"
model = AutoModelForImageToImage.from_pretrained(model_id)
image_processor = AutoImageProcessor.from_pretrained(model_id)
upscaler = ImageToImagePipeline(model=model, image_processor=image_processor)
upscaled_list = upscaler(self.examples)
self.assertEqual(len(upscaled_list), len(self.examples))
for output in upscaled_list:
self.assertIsInstance(output, Image.Image)
self.assertEqual(upscaled_list[0].size, (1296, 976))
self.assertEqual(upscaled_list[1].size, (1296, 976))

View File

@@ -0,0 +1,269 @@
# Copyright 2022 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import unittest
import httpx
from transformers import MODEL_FOR_VISION_2_SEQ_MAPPING, is_vision_available
from transformers.pipelines import ImageToTextPipeline, pipeline
from transformers.testing_utils import (
is_pipeline_test,
require_torch,
require_vision,
slow,
)
from .test_pipelines_common import ANY
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
@is_pipeline_test
@require_vision
class ImageToTextPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_VISION_2_SEQ_MAPPING
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
pipe = ImageToTextPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
max_new_tokens=20,
)
examples = [
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"./tests/fixtures/tests_samples/COCO/000000039769.png",
]
return pipe, examples
def run_pipeline_test(self, pipe, examples):
outputs = pipe(examples)
self.assertEqual(
outputs,
[
[{"generated_text": ANY(str)}],
[{"generated_text": ANY(str)}],
],
)
@require_torch
def test_small_model_pt(self):
pipe = pipeline("image-to-text", model="hf-internal-testing/tiny-random-vit-gpt2", max_new_tokens=19)
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
outputs = pipe(image)
self.assertEqual(
outputs,
[
{
"generated_text": "growthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthGOGO"
},
],
)
outputs = pipe([image, image])
self.assertEqual(
outputs,
[
[
{
"generated_text": "growthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthGOGO"
}
],
[
{
"generated_text": "growthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthGOGO"
}
],
],
)
@require_torch
def test_small_model_pt_conditional(self):
pipe = pipeline("image-to-text", model="hf-internal-testing/tiny-random-BlipForConditionalGeneration")
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
prompt = "a photo of"
outputs = pipe(image, prompt=prompt)
self.assertTrue(outputs[0]["generated_text"].startswith(prompt))
@require_torch
def test_consistent_batching_behaviour(self):
pipe = pipeline(
"image-to-text", model="hf-internal-testing/tiny-random-BlipForConditionalGeneration", max_new_tokens=10
)
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
prompt = "a photo of"
outputs = pipe([image, image], prompt=prompt)
self.assertTrue(outputs[0][0]["generated_text"].startswith(prompt))
self.assertTrue(outputs[1][0]["generated_text"].startswith(prompt))
outputs = pipe([image, image], prompt=prompt, batch_size=2)
self.assertTrue(outputs[0][0]["generated_text"].startswith(prompt))
self.assertTrue(outputs[1][0]["generated_text"].startswith(prompt))
from torch.utils.data import Dataset
class MyDataset(Dataset):
def __len__(self):
return 5
def __getitem__(self, i):
return "./tests/fixtures/tests_samples/COCO/000000039769.png"
dataset = MyDataset()
for batch_size in (1, 2, 4):
outputs = pipe(dataset, prompt=prompt, batch_size=batch_size if batch_size > 1 else None)
self.assertTrue(list(outputs)[0][0]["generated_text"].startswith(prompt))
self.assertTrue(list(outputs)[1][0]["generated_text"].startswith(prompt))
@slow
@require_torch
def test_large_model_pt(self):
pipe = pipeline("image-to-text", model="ydshieh/vit-gpt2-coco-en")
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
outputs = pipe(image)
self.assertEqual(outputs, [{"generated_text": "a cat laying on a blanket next to a cat laying on a bed "}])
outputs = pipe([image, image])
self.assertEqual(
outputs,
[
[{"generated_text": "a cat laying on a blanket next to a cat laying on a bed "}],
[{"generated_text": "a cat laying on a blanket next to a cat laying on a bed "}],
],
)
@slow
@require_torch
def test_generation_pt_blip(self):
pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
url = "https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/pokemon.png"
image = Image.open(io.BytesIO(httpx.get(url, follow_redirects=True).content))
outputs = pipe(image)
self.assertEqual(outputs, [{"generated_text": "a pink pokemon pokemon with a blue shirt and a blue shirt"}])
@slow
@require_torch
def test_generation_pt_git(self):
pipe = pipeline("image-to-text", model="microsoft/git-base-coco")
url = "https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/pokemon.png"
image = Image.open(io.BytesIO(httpx.get(url, follow_redirects=True).content))
outputs = pipe(image)
self.assertEqual(outputs, [{"generated_text": "a cartoon of a purple character."}])
@slow
@require_torch
def test_conditional_generation_pt_blip(self):
pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
image = Image.open(io.BytesIO(httpx.get(url, follow_redirects=True).content))
prompt = "a photography of"
outputs = pipe(image, prompt=prompt)
self.assertEqual(outputs, [{"generated_text": "a photography of a volcano"}])
with self.assertRaises(ValueError):
outputs = pipe([image, image], prompt=[prompt, prompt])
@slow
@require_torch
def test_conditional_generation_pt_git(self):
pipe = pipeline("image-to-text", model="microsoft/git-base-coco")
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
image = Image.open(io.BytesIO(httpx.get(url, follow_redirects=True).content))
prompt = "a photo of a"
outputs = pipe(image, prompt=prompt)
self.assertEqual(outputs, [{"generated_text": "a photo of a tent with a tent and a tent in the background."}])
with self.assertRaises(ValueError):
outputs = pipe([image, image], prompt=[prompt, prompt])
@slow
@require_torch
def test_conditional_generation_pt_pix2struct(self):
pipe = pipeline("image-to-text", model="google/pix2struct-ai2d-base")
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
image = Image.open(io.BytesIO(httpx.get(url, follow_redirects=True).content))
prompt = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"
outputs = pipe(image, prompt=prompt)
self.assertEqual(outputs, [{"generated_text": "ash cloud"}])
with self.assertRaises(ValueError):
outputs = pipe([image, image], prompt=[prompt, prompt])
@slow
@require_torch
@unittest.skip("TODO (joao, raushan): there is something wrong with image processing in the model/pipeline")
def test_conditional_generation_llava(self):
pipe = pipeline("image-to-text", model="llava-hf/bakLlava-v1-hf")
prompt = (
"<image>\nUSER: What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud?\nASSISTANT:"
)
outputs = pipe(
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg",
prompt=prompt,
generate_kwargs={"max_new_tokens": 200},
)
self.assertEqual(
outputs,
[
{
"generated_text": "\nUSER: What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud?\nASSISTANT: Lava"
}
],
)
@slow
@require_torch
def test_nougat(self):
pipe = pipeline("image-to-text", "facebook/nougat-base", max_new_tokens=19)
outputs = pipe("https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/nougat_paper.png")
self.assertEqual(
outputs,
[{"generated_text": "# Nougat: Neural Optical Understanding for Academic Documents\n\n Lukas Blec"}],
)

View File

@@ -0,0 +1,193 @@
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import datasets
from transformers.models.auto.modeling_auto import MODEL_FOR_KEYPOINT_MATCHING_MAPPING
from transformers.pipelines import KeypointMatchingPipeline, pipeline
from transformers.testing_utils import (
is_pipeline_test,
is_vision_available,
require_torch,
require_vision,
)
from .test_pipelines_common import ANY
if is_vision_available():
from PIL import Image
@is_pipeline_test
@require_torch
@require_vision
class KeypointMatchingPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_KEYPOINT_MATCHING_MAPPING
_dataset = None
@classmethod
def _load_dataset(cls):
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
if cls._dataset is None:
cls._dataset = datasets.load_dataset("hf-internal-testing/image-matching-dataset", split="train")
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
torch_dtype="float32",
):
image_matcher = KeypointMatchingPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
torch_dtype=torch_dtype,
)
examples = [
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
]
return image_matcher, examples
def run_pipeline_test(self, image_matcher, examples):
self._load_dataset()
outputs = image_matcher(
[
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
]
)
self.assertEqual(
outputs,
[
{
"keypoint_image_0": {"x": ANY(float), "y": ANY(float)},
"keypoint_image_1": {"x": ANY(float), "y": ANY(float)},
"score": ANY(float),
}
]
* 2, # 2 matches per image pair
)
# Accepts URL + PIL.Image + lists
outputs = image_matcher(
[
[
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
],
[self._dataset[0]["image"], self._dataset[1]["image"]],
[self._dataset[1]["image"], self._dataset[2]["image"]],
[self._dataset[2]["image"], self._dataset[0]["image"]],
]
)
self.assertEqual(
outputs,
[
[
{
"keypoint_image_0": {"x": ANY(float), "y": ANY(float)},
"keypoint_image_1": {"x": ANY(float), "y": ANY(float)},
"score": ANY(float),
}
]
* 2 # 2 matches per image pair
]
* 4, # 4 image pairs
)
@require_torch
def test_single_image(self):
self._load_dataset()
small_model = "magic-leap-community/superglue_outdoor"
image_matcher = pipeline("keypoint-matching", model=small_model)
with self.assertRaises(ValueError):
image_matcher(
self._dataset[0]["image"],
threshold=0.0,
)
with self.assertRaises(ValueError):
image_matcher(
[self._dataset[0]["image"]],
threshold=0.0,
)
@require_torch
def test_single_pair(self):
self._load_dataset()
small_model = "magic-leap-community/superglue_outdoor"
image_matcher = pipeline("keypoint-matching", model=small_model)
image_0: Image.Image = self._dataset[0]["image"]
image_1: Image.Image = self._dataset[1]["image"]
outputs = image_matcher((image_0, image_1), threshold=0.0)
output = outputs[0] # first match from image pair
self.assertAlmostEqual(output["keypoint_image_0"]["x"], 698, places=1)
self.assertAlmostEqual(output["keypoint_image_0"]["y"], 469, places=1)
self.assertAlmostEqual(output["keypoint_image_1"]["x"], 434, places=1)
self.assertAlmostEqual(output["keypoint_image_1"]["y"], 440, places=1)
self.assertAlmostEqual(output["score"], 0.9905, places=3)
@require_torch
def test_multiple_pairs(self):
self._load_dataset()
small_model = "magic-leap-community/superglue_outdoor"
image_matcher = pipeline("keypoint-matching", model=small_model)
image_0: Image.Image = self._dataset[0]["image"]
image_1: Image.Image = self._dataset[1]["image"]
image_2: Image.Image = self._dataset[2]["image"]
outputs = image_matcher(
[
(image_0, image_1),
(image_1, image_2),
(image_2, image_0),
],
threshold=1e-4,
)
# Test first pair (image_0, image_1)
output_0 = outputs[0][0] # First match from first pair
self.assertAlmostEqual(output_0["keypoint_image_0"]["x"], 698, places=1)
self.assertAlmostEqual(output_0["keypoint_image_0"]["y"], 469, places=1)
self.assertAlmostEqual(output_0["keypoint_image_1"]["x"], 434, places=1)
self.assertAlmostEqual(output_0["keypoint_image_1"]["y"], 440, places=1)
self.assertAlmostEqual(output_0["score"], 0.9905, places=3)
# Test second pair (image_1, image_2)
output_1 = outputs[1][0] # First match from second pair
self.assertAlmostEqual(output_1["keypoint_image_0"]["x"], 272, places=1)
self.assertAlmostEqual(output_1["keypoint_image_0"]["y"], 310, places=1)
self.assertAlmostEqual(output_1["keypoint_image_1"]["x"], 228, places=1)
self.assertAlmostEqual(output_1["keypoint_image_1"]["y"], 568, places=1)
self.assertAlmostEqual(output_1["score"], 0.9890, places=3)
# Test third pair (image_2, image_0)
output_2 = outputs[2][0] # First match from third pair
self.assertAlmostEqual(output_2["keypoint_image_0"]["x"], 385, places=1)
self.assertAlmostEqual(output_2["keypoint_image_0"]["y"], 677, places=1)
self.assertAlmostEqual(output_2["keypoint_image_1"]["x"], 689, places=1)
self.assertAlmostEqual(output_2["keypoint_image_1"]["y"], 351, places=1)
self.assertAlmostEqual(output_2["score"], 0.9900, places=3)

View File

@@ -0,0 +1,175 @@
# Copyright 2023 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from huggingface_hub.utils import insecure_hashlib
from transformers import (
MODEL_FOR_MASK_GENERATION_MAPPING,
is_torch_available,
is_vision_available,
pipeline,
)
from transformers.pipelines import MaskGenerationPipeline
from transformers.testing_utils import (
Expectations,
is_pipeline_test,
nested_simplify,
require_torch,
require_vision,
slow,
)
if is_torch_available():
from transformers import MODEL_FOR_MASK_GENERATION_MAPPING
else:
MODEL_FOR_MASK_GENERATION_MAPPING = None
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
def hashimage(image: Image) -> str:
m = insecure_hashlib.md5(image.tobytes())
return m.hexdigest()[:10]
def mask_to_test_readable(mask: Image) -> dict:
npimg = np.array(mask)
shape = npimg.shape
return {"hash": hashimage(mask), "shape": shape}
@is_pipeline_test
@require_vision
@require_torch
class MaskGenerationPipelineTests(unittest.TestCase):
model_mapping = dict(list(MODEL_FOR_MASK_GENERATION_MAPPING.items()) if MODEL_FOR_MASK_GENERATION_MAPPING else [])
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
image_segmenter = MaskGenerationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
return image_segmenter, [
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
]
@unittest.skip(reason="TODO @Arthur: Implement me")
def run_pipeline_test(self, mask_generator, examples):
pass
@slow
@require_torch
def test_small_model_pt(self):
image_segmenter = pipeline("mask-generation", model="facebook/sam-vit-huge")
outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", points_per_batch=256)
# Shortening by hashing
new_output = []
for i, o in enumerate(outputs["masks"]):
new_output += [{"mask": mask_to_test_readable(o), "scores": outputs["scores"][i]}]
# fmt: off
last_output = Expectations({
("cuda", None): {'mask': {'hash': 'b5f47c9191', 'shape': (480, 640)}, 'scores': 0.8871},
("rocm", (9, 5)): {'mask': {'hash': 'b5f47c9191', 'shape': (480, 640)}, 'scores': 0.8872}
}).get_expectation()
self.assertEqual(
nested_simplify(new_output, decimals=4),
[
{'mask': {'hash': '115ad19f5f', 'shape': (480, 640)}, 'scores': 1.0444},
{'mask': {'hash': '6affa964c6', 'shape': (480, 640)}, 'scores': 1.021},
{'mask': {'hash': 'dfe28a0388', 'shape': (480, 640)}, 'scores': 1.0167},
{'mask': {'hash': 'c0a5f4a318', 'shape': (480, 640)}, 'scores': 1.0132},
{'mask': {'hash': 'fe8065c197', 'shape': (480, 640)}, 'scores': 1.0053},
{'mask': {'hash': 'e2d0b7a0b7', 'shape': (480, 640)}, 'scores': 0.9967},
{'mask': {'hash': '453c7844bd', 'shape': (480, 640)}, 'scores': 0.993},
{'mask': {'hash': '3d44f2926d', 'shape': (480, 640)}, 'scores': 0.9909},
{'mask': {'hash': '64033ddc3f', 'shape': (480, 640)}, 'scores': 0.9879},
{'mask': {'hash': '801064ff79', 'shape': (480, 640)}, 'scores': 0.9834},
{'mask': {'hash': '6172f276ef', 'shape': (480, 640)}, 'scores': 0.9716},
{'mask': {'hash': 'b49e60e084', 'shape': (480, 640)}, 'scores': 0.9612},
{'mask': {'hash': 'a811e775fd', 'shape': (480, 640)}, 'scores': 0.9599},
{'mask': {'hash': 'a6a8ebcf4b', 'shape': (480, 640)}, 'scores': 0.9552},
{'mask': {'hash': '9d8257e080', 'shape': (480, 640)}, 'scores': 0.9532},
{'mask': {'hash': '32de6454a8', 'shape': (480, 640)}, 'scores': 0.9516},
{'mask': {'hash': 'af3d4af2c8', 'shape': (480, 640)}, 'scores': 0.9499},
{'mask': {'hash': '3c6db475fb', 'shape': (480, 640)}, 'scores': 0.9483},
{'mask': {'hash': 'c290813fb9', 'shape': (480, 640)}, 'scores': 0.9464},
{'mask': {'hash': 'b6f0b8f606', 'shape': (480, 640)}, 'scores': 0.943},
{'mask': {'hash': '92ce16bfdf', 'shape': (480, 640)}, 'scores': 0.943},
{'mask': {'hash': 'c749b25868', 'shape': (480, 640)}, 'scores': 0.9408},
{'mask': {'hash': 'efb6cab859', 'shape': (480, 640)}, 'scores': 0.9335},
{'mask': {'hash': '1ff2eafb30', 'shape': (480, 640)}, 'scores': 0.9326},
{'mask': {'hash': '788b798e24', 'shape': (480, 640)}, 'scores': 0.9262},
{'mask': {'hash': 'abea804f0e', 'shape': (480, 640)}, 'scores': 0.8999},
{'mask': {'hash': '7b9e8ddb73', 'shape': (480, 640)}, 'scores': 0.8986},
{'mask': {'hash': 'cd24047c8a', 'shape': (480, 640)}, 'scores': 0.8984},
{'mask': {'hash': '6943e6bcbd', 'shape': (480, 640)}, 'scores': 0.8873},
last_output
],
)
# fmt: on
@require_torch
@slow
def test_threshold(self):
model_id = "facebook/sam-vit-huge"
image_segmenter = pipeline("mask-generation", model=model_id)
outputs = image_segmenter(
"http://images.cocodataset.org/val2017/000000039769.jpg", pred_iou_thresh=1, points_per_batch=256
)
# Shortening by hashing
new_output = []
for i, o in enumerate(outputs["masks"]):
new_output += [{"mask": mask_to_test_readable(o), "scores": outputs["scores"][i]}]
self.assertEqual(
nested_simplify(new_output, decimals=4),
[
{"mask": {"hash": "115ad19f5f", "shape": (480, 640)}, "scores": 1.0444},
{"mask": {"hash": "6affa964c6", "shape": (480, 640)}, "scores": 1.0210},
{"mask": {"hash": "dfe28a0388", "shape": (480, 640)}, "scores": 1.0167},
{"mask": {"hash": "c0a5f4a318", "shape": (480, 640)}, "scores": 1.0132},
{"mask": {"hash": "fe8065c197", "shape": (480, 640)}, "scores": 1.0053},
],
)

View File

@@ -0,0 +1,297 @@
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import datasets
from huggingface_hub import ObjectDetectionOutputElement
from transformers import (
MODEL_FOR_OBJECT_DETECTION_MAPPING,
AutoFeatureExtractor,
AutoModelForObjectDetection,
ObjectDetectionPipeline,
is_vision_available,
pipeline,
)
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_pytesseract,
require_timm,
require_torch,
require_vision,
slow,
)
from .test_pipelines_common import ANY
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
@is_pipeline_test
@require_vision
@require_timm
@require_torch
class ObjectDetectionPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING
_dataset = None
@classmethod
def _load_dataset(cls):
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
if cls._dataset is None:
# we use revision="refs/pr/1" until the PR is merged
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
cls._dataset = datasets.load_dataset(
"hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1"
)
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
object_detector = ObjectDetectionPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"]
def run_pipeline_test(self, object_detector, examples):
self._load_dataset()
outputs = object_detector("./tests/fixtures/tests_samples/COCO/000000039769.png", threshold=0.0)
self.assertGreater(len(outputs), 0)
for detected_object in outputs:
self.assertEqual(
detected_object,
{
"score": ANY(float),
"label": ANY(str),
"box": {"xmin": ANY(int), "ymin": ANY(int), "xmax": ANY(int), "ymax": ANY(int)},
},
)
batch = [
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
# RGBA
self._dataset[0]["image"],
# LA
self._dataset[1]["image"],
# L
self._dataset[2]["image"],
]
batch_outputs = object_detector(batch, threshold=0.0)
self.assertEqual(len(batch), len(batch_outputs))
for outputs in batch_outputs:
self.assertGreater(len(outputs), 0)
for detected_object in outputs:
self.assertEqual(
detected_object,
{
"score": ANY(float),
"label": ANY(str),
"box": {"xmin": ANY(int), "ymin": ANY(int), "xmax": ANY(int), "ymax": ANY(int)},
},
)
compare_pipeline_output_to_hub_spec(detected_object, ObjectDetectionOutputElement)
@require_torch
def test_small_model_pt(self):
model_id = "hf-internal-testing/tiny-detr-mobilenetsv3"
model = AutoModelForObjectDetection.from_pretrained(model_id)
feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor)
outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=0.0)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}},
{"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}},
],
)
outputs = object_detector(
[
"http://images.cocodataset.org/val2017/000000039769.jpg",
"http://images.cocodataset.org/val2017/000000039769.jpg",
],
threshold=0.0,
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}},
{"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}},
],
[
{"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}},
{"score": 0.3376, "label": "LABEL_0", "box": {"xmin": 159, "ymin": 120, "xmax": 480, "ymax": 359}},
],
],
)
@require_torch
@slow
def test_large_model_pt(self):
model_id = "facebook/detr-resnet-50"
model = AutoModelForObjectDetection.from_pretrained(model_id)
feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor)
outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
{"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
{"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
],
)
outputs = object_detector(
[
"http://images.cocodataset.org/val2017/000000039769.jpg",
"http://images.cocodataset.org/val2017/000000039769.jpg",
]
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
{"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
{"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
],
[
{"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
{"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
{"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
],
],
)
@require_torch
@slow
def test_integration_torch_object_detection(self):
model_id = "facebook/detr-resnet-50"
object_detector = pipeline("object-detection", model=model_id)
outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg")
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
{"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
{"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
],
)
outputs = object_detector(
[
"http://images.cocodataset.org/val2017/000000039769.jpg",
"http://images.cocodataset.org/val2017/000000039769.jpg",
]
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
{"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
{"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
],
[
{"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
{"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
{"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
],
],
)
@require_torch
@slow
def test_threshold(self):
threshold = 0.9985
model_id = "facebook/detr-resnet-50"
object_detector = pipeline("object-detection", model=model_id)
outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=threshold)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
{"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
],
)
@require_torch
@require_pytesseract
@slow
def test_layoutlm(self):
model_id = "Narsil/layoutlmv3-finetuned-funsd"
threshold = 0.9993
object_detector = pipeline("object-detection", model=model_id, threshold=threshold)
outputs = object_detector(
"https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png"
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.9993, "label": "I-ANSWER", "box": {"xmin": 294, "ymin": 254, "xmax": 343, "ymax": 264}},
{"score": 0.9993, "label": "I-ANSWER", "box": {"xmin": 294, "ymin": 254, "xmax": 343, "ymax": 264}},
],
)

View File

@@ -0,0 +1,548 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from huggingface_hub import QuestionAnsweringOutputElement
from transformers import (
MODEL_FOR_QUESTION_ANSWERING_MAPPING,
LxmertConfig,
QuestionAnsweringPipeline,
)
from transformers.data.processors.squad import SquadExample
from transformers.pipelines import QuestionAnsweringArgumentHandler, pipeline
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
is_torch_available,
nested_simplify,
require_torch,
slow,
)
if is_torch_available():
import torch
from .test_pipelines_common import ANY
# These 2 model types require different inputs than those of the usual text models.
_TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}
@is_pipeline_test
class QAPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING
if not hasattr(model_mapping, "is_dummy"):
model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ not in _TO_SKIP}
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
if isinstance(model.config, LxmertConfig):
# This is an bimodal model, we need to find a more consistent way
# to switch on those models.
return None, None
question_answerer = QuestionAnsweringPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
examples = [
{"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
{"question": "In what field is HuggingFace ?", "context": "HuggingFace is an AI startup."},
]
return question_answerer, examples
def run_pipeline_test(self, question_answerer, _):
outputs = question_answerer(
question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris."
)
self.assertEqual(outputs, {"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)})
outputs = question_answerer(
question="Where was HuggingFace founded ?",
context="HuggingFace was founded in Paris.",
handle_impossible_answer=True,
)
self.assertEqual(outputs, {"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)})
outputs = question_answerer(
question=["In what field is HuggingFace working ?", "In what field is HuggingFace working ?"],
context="HuggingFace was founded in Paris.",
)
self.assertEqual(
outputs,
[
{"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)},
{"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)},
],
)
outputs = question_answerer(
question=["What field is HuggingFace working ?", "In what field is HuggingFace ?"],
context=[
"HuggingFace is a startup based in New-York",
"HuggingFace is a startup founded in Paris",
],
)
self.assertEqual(
outputs,
[
{"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)},
{"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)},
],
)
with self.assertRaises(ValueError):
question_answerer(question="", context="HuggingFace was founded in Paris.")
with self.assertRaises(ValueError):
question_answerer(question=None, context="HuggingFace was founded in Paris.")
with self.assertRaises(ValueError):
question_answerer(question="In what field is HuggingFace working ?", context="")
with self.assertRaises(ValueError):
question_answerer(question="In what field is HuggingFace working ?", context=None)
outputs = question_answerer(
question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris.", top_k=20
)
self.assertEqual(
outputs,
[
{"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)}
for i in range(len(outputs))
],
)
for single_output in outputs:
compare_pipeline_output_to_hub_spec(single_output, QuestionAnsweringOutputElement)
# Very long context require multiple features
outputs = question_answerer(
question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris." * 20
)
self.assertEqual(outputs, {"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)})
# Using batch is OK
if question_answerer.tokenizer.pad_token_id is None:
question_answerer.tokenizer.pad_token_id = question_answerer.model.config.eos_token_id
new_outputs = question_answerer(
question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris." * 20, batch_size=2
)
self.assertEqual(new_outputs, {"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)})
self.assertEqual(nested_simplify(outputs), nested_simplify(new_outputs))
@require_torch
def test_small_model_pt(self):
question_answerer = pipeline(
"question-answering", model="sshleifer/tiny-distilbert-base-cased-distilled-squad"
)
outputs = question_answerer(
question="Where was HuggingFace founded ?",
context="HuggingFace was founded in Paris.",
)
self.assertEqual(nested_simplify(outputs), {"score": 0.063, "start": 0, "end": 11, "answer": "HuggingFace"})
@require_torch
def test_small_model_pt_fp16(self):
question_answerer = pipeline(
"question-answering",
model="sshleifer/tiny-distilbert-base-cased-distilled-squad",
dtype=torch.float16,
)
outputs = question_answerer(
question="Where was HuggingFace founded ?",
context="HuggingFace was founded in Paris.",
)
self.assertEqual(nested_simplify(outputs), {"score": 0.063, "start": 0, "end": 11, "answer": "HuggingFace"})
@require_torch
def test_small_model_pt_bf16(self):
question_answerer = pipeline(
"question-answering",
model="sshleifer/tiny-distilbert-base-cased-distilled-squad",
dtype=torch.bfloat16,
)
outputs = question_answerer(
question="Where was HuggingFace founded ?",
context="HuggingFace was founded in Paris.",
)
self.assertEqual(nested_simplify(outputs), {"score": 0.063, "start": 0, "end": 11, "answer": "HuggingFace"})
@require_torch
def test_small_model_pt_iterator(self):
# https://github.com/huggingface/transformers/issues/18510
pipe = pipeline(model="sshleifer/tiny-distilbert-base-cased-distilled-squad", batch_size=16)
def data():
for i in range(10):
yield {"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."}
for outputs in pipe(data()):
self.assertEqual(
nested_simplify(outputs), {"score": 0.063, "start": 0, "end": 11, "answer": "HuggingFace"}
)
@require_torch
def test_small_model_pt_softmax_trick(self):
question_answerer = pipeline(
"question-answering", model="sshleifer/tiny-distilbert-base-cased-distilled-squad"
)
real_postprocess = question_answerer.postprocess
# Tweak start and stop to make sure we encounter the softmax logits
# bug.
def ensure_large_logits_postprocess(
model_outputs,
top_k=1,
handle_impossible_answer=False,
max_answer_len=15,
):
for output in model_outputs:
output["start"] = output["start"] * 1e6
output["end"] = output["end"] * 1e6
return real_postprocess(
model_outputs,
top_k=top_k,
handle_impossible_answer=handle_impossible_answer,
max_answer_len=max_answer_len,
)
question_answerer.postprocess = ensure_large_logits_postprocess
outputs = question_answerer(
question="Where was HuggingFace founded ?",
context="HuggingFace was founded in Paris.",
)
self.assertEqual(nested_simplify(outputs), {"score": 0.111, "start": 0, "end": 11, "answer": "HuggingFace"})
@slow
@require_torch
def test_small_model_japanese(self):
question_answerer = pipeline(
"question-answering",
model="KoichiYasuoka/deberta-base-japanese-aozora-ud-head",
)
output = question_answerer(question="国語", context="全学年にわたって小学校の国語の教科書に挿し絵が用いられている") # fmt: skip
# Wrong answer, the whole text is identified as one "word" since the tokenizer does not include
# a pretokenizer
self.assertEqual(nested_simplify(output),{"score": 1.0, "start": 0, "end": 30, "answer": "全学年にわたって小学校の国語の教科書に挿し絵が用いられている"}) # fmt: skip
# Disable word alignment
output = question_answerer(question="国語", context="全学年にわたって小学校の国語の教科書に挿し絵が用いられている", align_to_words=False) # fmt: skip
self.assertEqual(
nested_simplify(output),
{"score": 1.0, "start": 15, "end": 18, "answer": "教科書"},
)
@slow
@require_torch
def test_small_model_long_context_cls_slow(self):
question_answerer = pipeline(
"question-answering",
model="deepset/roberta-base-squad2",
handle_impossible_answer=True,
max_seq_length=512,
)
outputs = question_answerer(
question="What country is Paris the capital of?",
context="""London is the capital and largest city of England and the United Kingdom. It stands on the River Thames in south-east England at the head of a 50-mile (80 km) estuary down to the North Sea, and has been a major settlement for two millennia. The City of London, its ancient core and financial centre, was founded by the Romans as Londinium and retains boundaries close to its medieval ones. Since the 19th century, \"London\" has also referred to the metropolis around this core, historically split between the counties of Middlesex, Essex, Surrey, Kent, and Hertfordshire, which largely comprises Greater London, governed by the Greater London Authority. The City of Westminster, to the west of the City of London, has for centuries held the national government and parliament. As one of the world's global cities, London exerts strong influence on its arts, commerce, education, entertainment, fashion, finance, health care, media, tourism, and communications, and has sometimes been called the capital of the world. Its GDP (€801.66 billion in 2017) makes it the biggest urban economy in Europe, and it is one of the major financial centres in the world. In 2019 it had the second-highest number of ultra high-net-worth individuals in Europe after Paris and the second-highest number of billionaires in Europe after Moscow. As of 2021, London has the most millionaires of any city. With Europe's largest concentration of higher education institutions, it includes Imperial College London in natural and applied sciences, the London School of Economics in social sciences, and the comprehensive University College London. The city is home to the most 5-star hotels of any city in the world. In 2012, London became the first city to host three Summer Olympic Games. London is the capital and largest city of England and the United Kingdom. It stands on the River Thames in south-east England at the head of a 50-mile (80 km) estuary down to the North Sea, and has been a major settlement for two millennia. The City of London, its ancient core and financial centre, was founded by the Romans as Londinium and retains boundaries close to its medieval ones. Since the 19th century, \"London\" has also referred to the metropolis around this core, historically split between the counties of Middlesex, Essex, Surrey, Kent, and Hertfordshire, which largely comprises Greater London, governed by the Greater London Authority. The City of Westminster, to the west of the City of London, has for centuries held the national government and parliament. As one of the world's global cities, London exerts strong influence on its arts, commerce, education, entertainment, fashion, finance, health care, media, tourism, and communications, and has sometimes been called the capital of the world. Its GDP (€801.66 billion in 2017) makes it the biggest urban economy in Europe, and it is one of the major financial centres in the world. In 2019 it had the second-highest number of ultra high-net-worth individuals in Europe after Paris and the second-highest number of billionaires in Europe after Moscow. As of 2021, London has the most millionaires of any city. With Europe's largest concentration of higher education institutions, it includes Imperial College London in natural and applied sciences, the London School of Economics in social sciences, and the comprehensive University College London. The city is home to the most 5-star hotels of any city in the world. In 2012, London became the first city to host three Summer Olympic Games.""",
)
self.assertEqual(nested_simplify(outputs), {"score": 0.988, "start": 0, "end": 0, "answer": ""})
@require_torch
def test_duplicate_handling(self):
question_answerer = pipeline("question-answering", model="deepset/tinyroberta-squad2")
outputs = question_answerer(
question="Who is the chancellor of Germany?",
context="Angela Merkel was the chancellor of Germany.",
top_k=10,
)
answers = [output["answer"] for output in outputs]
self.assertEqual(len(answers), len(set(answers)), "There are duplicate answers in the outputs.")
@slow
@require_torch
def test_large_model_pt(self):
question_answerer = pipeline(
"question-answering",
)
outputs = question_answerer(
question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris."
)
self.assertEqual(nested_simplify(outputs), {"score": 0.979, "start": 27, "end": 32, "answer": "Paris"})
@slow
@require_torch
def test_large_model_issue(self):
qa_pipeline = pipeline(
"question-answering",
model="mrm8488/bert-multi-cased-finetuned-xquadv1",
)
outputs = qa_pipeline(
{
"context": (
"Yes Bank founder Rana Kapoor has approached the Bombay High Court, challenging a special court's"
" order from August this year that had remanded him in police custody for a week in a multi-crore"
" loan fraud case. Kapoor, who is currently lodged in Taloja Jail, is an accused in the loan fraud"
" case and some related matters being probed by the CBI and Enforcement Directorate. A single"
" bench presided over by Justice S K Shinde on Tuesday posted the plea for further hearing on"
" October 14. In his plea filed through advocate Vijay Agarwal, Kapoor claimed that the special"
" court's order permitting the CBI's request for police custody on August 14 was illegal and in"
" breach of the due process of law. Therefore, his police custody and subsequent judicial custody"
" in the case were all illegal. Kapoor has urged the High Court to quash and set aside the special"
" court's order dated August 14. As per his plea, in August this year, the CBI had moved two"
" applications before the special court, one seeking permission to arrest Kapoor, who was already"
" in judicial custody at the time in another case, and the other, seeking his police custody."
" While the special court refused to grant permission to the CBI to arrest Kapoor, it granted the"
" central agency's plea for his custody. Kapoor, however, said in his plea that before filing an"
" application for his arrest, the CBI had not followed the process of issuing him a notice under"
" Section 41 of the CrPC for appearance before it. He further said that the CBI had not taken"
" prior sanction as mandated under section 17 A of the Prevention of Corruption Act for"
" prosecuting him. The special court, however, had said in its order at the time that as Kapoor"
" was already in judicial custody in another case and was not a free man the procedure mandated"
" under Section 41 of the CrPC need not have been adhered to as far as issuing a prior notice of"
" appearance was concerned. ADVERTISING It had also said that case records showed that the"
" investigating officer had taken an approval from a managing director of Yes Bank before"
" beginning the proceedings against Kapoor and such a permission was a valid sanction. However,"
" Kapoor in his plea said that the above order was bad in law and sought that it be quashed and"
" set aside. The law mandated that if initial action was not in consonance with legal procedures,"
" then all subsequent actions must be held as illegal, he said, urging the High Court to declare"
" the CBI remand and custody and all subsequent proceedings including the further custody as"
" illegal and void ab-initio. In a separate plea before the High Court, Kapoor's daughter Rakhee"
" Kapoor-Tandon has sought exemption from in-person appearance before a special PMLA court. Rakhee"
" has stated that she is a resident of the United Kingdom and is unable to travel to India owing"
" to restrictions imposed due to the COVID-19 pandemic. According to the CBI, in the present case,"
" Kapoor had obtained a gratification or pecuniary advantage of ₹ 307 crore, and thereby caused"
" Yes Bank a loss of ₹ 1,800 crore by extending credit facilities to Avantha Group, when it was"
" not eligible for the same"
),
"question": "Is this person involved in fraud?",
}
)
self.assertEqual(
nested_simplify(outputs),
{"answer": "an accused in the loan fraud case", "end": 294, "score": 0.001, "start": 261},
)
@slow
@require_torch
def test_large_model_course(self):
question_answerer = pipeline("question-answering")
long_context = """
🤗 Transformers: State of the Art NLP
🤗 Transformers provides thousands of pretrained models to perform tasks on texts such as classification, information extraction,
question answering, summarization, translation, text generation and more in over 100 languages.
Its aim is to make cutting-edge NLP easier to use for everyone.
🤗 Transformers provides APIs to quickly download and use those pretrained models on a given text, fine-tune them on your own datasets and
then share them with the community on our model hub. At the same time, each python module defining an architecture is fully standalone and
can be modified to enable quick research experiments.
Why should I use transformers?
1. Easy-to-use state-of-the-art models:
- High performance on NLU and NLG tasks.
- Low barrier to entry for educators and practitioners.
- Few user-facing abstractions with just three classes to learn.
- A unified API for using all our pretrained models.
- Lower compute costs, smaller carbon footprint:
2. Researchers can share trained models instead of always retraining.
- Practitioners can reduce compute time and production costs.
- Dozens of architectures with over 10,000 pretrained models, some in more than 100 languages.
3. Choose the right framework for every part of a model's lifetime:
- Train state-of-the-art models in 3 lines of code.
- Move a single model between TF2.0/PyTorch frameworks at will.
- Seamlessly pick the right framework for training, evaluation and production.
4. Easily customize a model or an example to your needs:
- We provide examples for each architecture to reproduce the results published by its original authors.
- Model internals are exposed as consistently as possible.
- Model files can be used independently of the library for quick experiments.
🤗 Transformers is backed by the three most popular deep learning libraries — Jax, PyTorch and TensorFlow — with a seamless integration
between them. It's straightforward to train your models with one before loading them for inference with the other.
"""
question = "Which deep learning libraries back 🤗 Transformers?"
outputs = question_answerer(question=question, context=long_context)
self.assertEqual(
nested_simplify(outputs),
{"answer": "Jax, PyTorch and TensorFlow", "end": 1919, "score": 0.972, "start": 1892},
)
@require_torch
class QuestionAnsweringArgumentHandlerTests(unittest.TestCase):
def test_argument_handler(self):
qa = QuestionAnsweringArgumentHandler()
Q = "Where was HuggingFace founded ?"
C = "HuggingFace was founded in Paris"
normalized = qa(Q, C)
self.assertEqual(type(normalized), list)
self.assertEqual(len(normalized), 1)
self.assertEqual({type(el) for el in normalized}, {SquadExample})
normalized = qa(question=Q, context=C)
self.assertEqual(type(normalized), list)
self.assertEqual(len(normalized), 1)
self.assertEqual({type(el) for el in normalized}, {SquadExample})
normalized = qa(question=Q, context=C)
self.assertEqual(type(normalized), list)
self.assertEqual(len(normalized), 1)
self.assertEqual({type(el) for el in normalized}, {SquadExample})
normalized = qa(question=[Q, Q], context=C)
self.assertEqual(type(normalized), list)
self.assertEqual(len(normalized), 2)
self.assertEqual({type(el) for el in normalized}, {SquadExample})
normalized = qa({"question": Q, "context": C})
self.assertEqual(type(normalized), list)
self.assertEqual(len(normalized), 1)
self.assertEqual({type(el) for el in normalized}, {SquadExample})
normalized = qa([{"question": Q, "context": C}])
self.assertEqual(type(normalized), list)
self.assertEqual(len(normalized), 1)
self.assertEqual({type(el) for el in normalized}, {SquadExample})
normalized = qa([{"question": Q, "context": C}, {"question": Q, "context": C}])
self.assertEqual(type(normalized), list)
self.assertEqual(len(normalized), 2)
self.assertEqual({type(el) for el in normalized}, {SquadExample})
normalized = qa(X={"question": Q, "context": C})
self.assertEqual(type(normalized), list)
self.assertEqual(len(normalized), 1)
self.assertEqual({type(el) for el in normalized}, {SquadExample})
normalized = qa(X=[{"question": Q, "context": C}])
self.assertEqual(type(normalized), list)
self.assertEqual(len(normalized), 1)
self.assertEqual({type(el) for el in normalized}, {SquadExample})
normalized = qa(data={"question": Q, "context": C})
self.assertEqual(type(normalized), list)
self.assertEqual(len(normalized), 1)
self.assertEqual({type(el) for el in normalized}, {SquadExample})
def test_argument_handler_error_handling(self):
qa = QuestionAnsweringArgumentHandler()
Q = "Where was HuggingFace founded ?"
C = "HuggingFace was founded in Paris"
with self.assertRaises(KeyError):
qa({"context": C})
with self.assertRaises(KeyError):
qa({"question": Q})
with self.assertRaises(KeyError):
qa([{"context": C}])
with self.assertRaises(ValueError):
qa(None, C)
with self.assertRaises(ValueError):
qa("", C)
with self.assertRaises(ValueError):
qa(Q, None)
with self.assertRaises(ValueError):
qa(Q, "")
with self.assertRaises(ValueError):
qa(question=None, context=C)
with self.assertRaises(ValueError):
qa(question="", context=C)
with self.assertRaises(ValueError):
qa(question=Q, context=None)
with self.assertRaises(ValueError):
qa(question=Q, context="")
with self.assertRaises(ValueError):
qa({"question": None, "context": C})
with self.assertRaises(ValueError):
qa({"question": "", "context": C})
with self.assertRaises(ValueError):
qa({"question": Q, "context": None})
with self.assertRaises(ValueError):
qa({"question": Q, "context": ""})
with self.assertRaises(ValueError):
qa([{"question": Q, "context": C}, {"question": None, "context": C}])
with self.assertRaises(ValueError):
qa([{"question": Q, "context": C}, {"question": "", "context": C}])
with self.assertRaises(ValueError):
qa([{"question": Q, "context": C}, {"question": Q, "context": None}])
with self.assertRaises(ValueError):
qa([{"question": Q, "context": C}, {"question": Q, "context": ""}])
with self.assertRaises(ValueError):
qa(question={"This": "Is weird"}, context="This is a context")
with self.assertRaises(ValueError):
qa(question=[Q, Q], context=[C, C, C])
with self.assertRaises(ValueError):
qa(question=[Q, Q, Q], context=[C, C])
def test_argument_handler_old_format(self):
qa = QuestionAnsweringArgumentHandler()
Q = "Where was HuggingFace founded ?"
C = "HuggingFace was founded in Paris"
# Backward compatibility for this
normalized = qa(question=[Q, Q], context=[C, C])
self.assertEqual(type(normalized), list)
self.assertEqual(len(normalized), 2)
self.assertEqual({type(el) for el in normalized}, {SquadExample})
def test_argument_handler_error_handling_odd(self):
qa = QuestionAnsweringArgumentHandler()
with self.assertRaises(ValueError):
qa(None)
with self.assertRaises(ValueError):
qa(Y=None)
with self.assertRaises(ValueError):
qa(1)

View File

@@ -0,0 +1,145 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers import (
MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
SummarizationPipeline,
pipeline,
)
from transformers.testing_utils import is_pipeline_test, require_torch, slow, torch_device
from transformers.tokenization_utils import TruncationStrategy
from .test_pipelines_common import ANY
@is_pipeline_test
class SummarizationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
summarizer = SummarizationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
max_new_tokens=20,
)
return summarizer, ["(CNN)The Palestinian Authority officially became", "Some other text"]
def run_pipeline_test(self, summarizer, _):
model = summarizer.model
outputs = summarizer("(CNN)The Palestinian Authority officially became")
self.assertEqual(outputs, [{"summary_text": ANY(str)}])
outputs = summarizer(
"(CNN)The Palestinian Authority officially became ",
num_beams=2,
min_length=2,
max_length=5,
)
self.assertEqual(outputs, [{"summary_text": ANY(str)}])
# Some models (Switch Transformers, LED, T5, LongT5, etc) can handle long sequences.
model_can_handle_longer_seq = [
"SwitchTransformersConfig",
"T5Config",
"LongT5Config",
"LEDConfig",
"PegasusXConfig",
"FSMTConfig",
"M2M100Config",
"ProphetNetConfig", # positional embeddings up to a fixed maximum size (otherwise clamping the values)
]
if model.config.__class__.__name__ not in model_can_handle_longer_seq:
if str(summarizer.device) == "cpu":
with self.assertRaises(Exception):
outputs = summarizer("This " * 1000)
outputs = summarizer("This " * 1000, truncation=TruncationStrategy.ONLY_FIRST)
@require_torch
def test_small_model_pt(self):
summarizer = pipeline(task="summarization", model="sshleifer/tiny-mbart", max_new_tokens=19)
outputs = summarizer("This is a small test")
self.assertEqual(
outputs,
[
{
"summary_text": "เข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไป"
}
],
)
@require_torch
@slow
def test_integration_torch_summarization(self):
summarizer = pipeline(task="summarization", device=torch_device)
cnn_article = (
" (CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on"
" Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The"
" formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based."
" The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted its"
' jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East'
' Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the'
" situation in Palestinian territories, paving the way for possible war crimes investigations against"
" Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and"
" the United States, neither of which is an ICC member, opposed the Palestinians' efforts to join the"
" body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday's ceremony, said it was a"
' move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the'
' world is also a step closer to ending a long era of impunity and injustice," he said, according to an'
' ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge'
" Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the"
' Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine'
" acquires all the rights as well as responsibilities that come with being a State Party to the Statute."
' These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights'
' Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should'
" immediately end their pressure, and countries that support universal acceptance of the court's treaty"
' should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the'
" group. \"What's objectionable is the attempts to undermine international justice, not Palestine's"
' decision to join a treaty to which over 100 countries around the world are members." In January, when'
" the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an"
' outrage, saying the court was overstepping its boundaries. The United States also said it "strongly"'
" disagreed with the court's decision. \"As we have said repeatedly, we do not believe that Palestine is a"
' state and therefore we do not believe that it is eligible to join the ICC," the State Department said in'
' a statement. It urged the warring sides to resolve their differences through direct negotiations. "We'
' will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace,"'
" it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the"
' territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the'
" court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou"
' Bensouda said her office would "conduct its analysis in full independence and impartiality." The war'
" between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry"
" will include alleged war crimes committed since June. The International Criminal Court was set up in"
" 2002 to prosecute genocide, crimes against humanity and war crimes. CNN's Vasco Cotovio, Kareem Khadder"
" and Faith Karimi contributed to this report."
)
expected_cnn_summary = (
" The Palestinian Authority becomes the 123rd member of the International Criminal Court . The move gives"
" the court jurisdiction over alleged crimes in Palestinian territories . Israel and the United States"
" opposed the Palestinians' efforts to join the court . Rights group Human Rights Watch welcomes the move,"
" says governments seeking to penalize Palestine should end pressure ."
)
result = summarizer(cnn_article)
self.assertEqual(result[0]["summary_text"], expected_cnn_summary)

View File

@@ -0,0 +1,373 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers import (
MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
AutoModelForTableQuestionAnswering,
AutoTokenizer,
TableQuestionAnsweringPipeline,
pipeline,
)
from transformers.testing_utils import (
is_pipeline_test,
require_torch,
slow,
)
@is_pipeline_test
class TQAPipelineTests(unittest.TestCase):
# Putting it there for consistency, but TQA do not have fast tokenizer
# which are needed to generate automatic tests
model_mapping = MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING
@require_torch
def test_small_model_pt(self, dtype="float32"):
model_id = "lysandre/tiny-tapas-random-wtq"
model = AutoModelForTableQuestionAnswering.from_pretrained(model_id, dtype=dtype)
tokenizer = AutoTokenizer.from_pretrained(model_id)
self.assertIsInstance(model.config.aggregation_labels, dict)
self.assertIsInstance(model.config.no_aggregation_label_index, int)
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer, max_new_tokens=20)
outputs = table_querier(
table={
"actors": ["brad pitt", "leonardo di caprio", "george clooney"],
"age": ["56", "45", "59"],
"number of movies": ["87", "53", "69"],
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
},
query="how many movies has george clooney played in?",
)
self.assertEqual(
outputs,
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
)
outputs = table_querier(
table={
"actors": ["brad pitt", "leonardo di caprio", "george clooney"],
"age": ["56", "45", "59"],
"number of movies": ["87", "53", "69"],
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
},
query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"],
)
self.assertEqual(
outputs,
[
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
],
)
outputs = table_querier(
table={
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
},
query=[
"What repository has the largest number of stars?",
"Given that the numbers of stars defines if a repository is active, what repository is the most"
" active?",
"What is the number of repositories?",
"What is the average number of stars?",
"What is the total amount of stars?",
],
)
self.assertEqual(
outputs,
[
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
],
)
with self.assertRaises(ValueError):
table_querier(query="What does it do with empty context ?", table=None)
with self.assertRaises(ValueError):
table_querier(query="What does it do with empty context ?", table="")
with self.assertRaises(ValueError):
table_querier(query="What does it do with empty context ?", table={})
with self.assertRaises(ValueError):
table_querier(
table={
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
}
)
with self.assertRaises(ValueError):
table_querier(
query="",
table={
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
},
)
with self.assertRaises(ValueError):
table_querier(
query=None,
table={
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
},
)
@require_torch
def test_small_model_pt_fp16(self):
self.test_small_model_pt(dtype="float16")
@require_torch
def test_slow_tokenizer_sqa_pt(self, dtype="float32"):
model_id = "lysandre/tiny-tapas-random-sqa"
model = AutoModelForTableQuestionAnswering.from_pretrained(model_id, dtype=dtype)
tokenizer = AutoTokenizer.from_pretrained(model_id)
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer, max_new_tokens=20)
inputs = {
"table": {
"actors": ["brad pitt", "leonardo di caprio", "george clooney"],
"age": ["56", "45", "59"],
"number of movies": ["87", "53", "69"],
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
},
"query": ["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"],
}
sequential_outputs = table_querier(**inputs, sequential=True)
batch_outputs = table_querier(**inputs, sequential=False)
self.assertEqual(len(sequential_outputs), 3)
self.assertEqual(len(batch_outputs), 3)
self.assertEqual(sequential_outputs[0], batch_outputs[0])
self.assertNotEqual(sequential_outputs[1], batch_outputs[1])
# self.assertNotEqual(sequential_outputs[2], batch_outputs[2])
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer, max_new_tokens=20)
outputs = table_querier(
table={
"actors": ["brad pitt", "leonardo di caprio", "george clooney"],
"age": ["56", "45", "59"],
"number of movies": ["87", "53", "69"],
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
},
query="how many movies has george clooney played in?",
)
self.assertEqual(
outputs,
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
)
outputs = table_querier(
table={
"actors": ["brad pitt", "leonardo di caprio", "george clooney"],
"age": ["56", "45", "59"],
"number of movies": ["87", "53", "69"],
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
},
query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"],
)
self.assertEqual(
outputs,
[
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
],
)
outputs = table_querier(
table={
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
},
query=[
"What repository has the largest number of stars?",
"Given that the numbers of stars defines if a repository is active, what repository is the most"
" active?",
"What is the number of repositories?",
"What is the average number of stars?",
"What is the total amount of stars?",
],
)
self.assertEqual(
outputs,
[
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
],
)
with self.assertRaises(ValueError):
table_querier(query="What does it do with empty context ?", table=None)
with self.assertRaises(ValueError):
table_querier(query="What does it do with empty context ?", table="")
with self.assertRaises(ValueError):
table_querier(query="What does it do with empty context ?", table={})
with self.assertRaises(ValueError):
table_querier(
table={
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
}
)
with self.assertRaises(ValueError):
table_querier(
query="",
table={
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
},
)
with self.assertRaises(ValueError):
table_querier(
query=None,
table={
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
},
)
@require_torch
def test_slow_tokenizer_sqa_pt_fp16(self):
self.test_slow_tokenizer_sqa_pt(dtype="float16")
@slow
@require_torch
def test_integration_wtq_pt(self, dtype="float32"):
table_querier = pipeline("table-question-answering", dtype=dtype)
data = {
"Repository": ["Transformers", "Datasets", "Tokenizers"],
"Stars": ["36542", "4512", "3934"],
"Contributors": ["651", "77", "34"],
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
}
queries = [
"What repository has the largest number of stars?",
"Given that the numbers of stars defines if a repository is active, what repository is the most active?",
"What is the number of repositories?",
"What is the average number of stars?",
"What is the total amount of stars?",
]
results = table_querier(data, queries)
expected_results = [
{"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"},
{"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"},
{
"answer": "COUNT > Transformers, Datasets, Tokenizers",
"coordinates": [(0, 0), (1, 0), (2, 0)],
"cells": ["Transformers", "Datasets", "Tokenizers"],
"aggregator": "COUNT",
},
{
"answer": "AVERAGE > 36542, 4512, 3934",
"coordinates": [(0, 1), (1, 1), (2, 1)],
"cells": ["36542", "4512", "3934"],
"aggregator": "AVERAGE",
},
{
"answer": "SUM > 36542, 4512, 3934",
"coordinates": [(0, 1), (1, 1), (2, 1)],
"cells": ["36542", "4512", "3934"],
"aggregator": "SUM",
},
]
self.assertListEqual(results, expected_results)
@slow
@require_torch
def test_integration_wtq_pt_fp16(self):
self.test_integration_wtq_pt(dtype="float16")
@slow
@require_torch
def test_integration_sqa_pt(self, dtype="float32"):
table_querier = pipeline(
"table-question-answering",
model="google/tapas-base-finetuned-sqa",
tokenizer="google/tapas-base-finetuned-sqa",
dtype=dtype,
)
data = {
"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
"Age": ["56", "45", "59"],
"Number of movies": ["87", "53", "69"],
"Date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
}
queries = ["How many movies has George Clooney played in?", "How old is he?", "What's his date of birth?"]
results = table_querier(data, queries, sequential=True)
expected_results = [
{"answer": "69", "coordinates": [(2, 2)], "cells": ["69"]},
{"answer": "59", "coordinates": [(2, 1)], "cells": ["59"]},
{"answer": "28 november 1967", "coordinates": [(2, 3)], "cells": ["28 november 1967"]},
]
self.assertListEqual(results, expected_results)
@slow
@require_torch
def test_integration_sqa_pt_fp16(self):
self.test_integration_sqa_pt(dtype="float16")
@slow
@require_torch
def test_large_model_pt_tapex(self, dtype="float32"):
model_id = "microsoft/tapex-large-finetuned-wtq"
table_querier = pipeline(
"table-question-answering",
model=model_id,
dtype=dtype,
)
data = {
"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
"Age": ["56", "45", "59"],
"Number of movies": ["87", "53", "69"],
"Date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
}
queries = [
"How many movies has George Clooney played in?",
"How old is Mr Clooney ?",
"What's the date of birth of Leonardo ?",
]
results = table_querier(data, queries, sequential=True)
expected_results = [
{"answer": " 69"},
{"answer": " 59"},
{"answer": " 10 june 1996"},
]
self.assertListEqual(results, expected_results)

View File

@@ -0,0 +1,139 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers import (
MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
Text2TextGenerationPipeline,
pipeline,
)
from transformers.testing_utils import is_pipeline_test, require_torch
from transformers.utils import is_torch_available
from .test_pipelines_common import ANY
if is_torch_available():
import torch
@is_pipeline_test
class Text2TextGenerationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
generator = Text2TextGenerationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
max_new_tokens=20,
)
return generator, ["Something to write", "Something else"]
def run_pipeline_test(self, generator, _):
outputs = generator("Something there")
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
# These are encoder decoder, they don't just append to incoming string
self.assertFalse(outputs[0]["generated_text"].startswith("Something there"))
outputs = generator(["This is great !", "Something else"], num_return_sequences=2, do_sample=True)
self.assertEqual(
outputs,
[
[{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
[{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
],
)
outputs = generator(
["This is great !", "Something else"], num_return_sequences=2, batch_size=2, do_sample=True
)
self.assertEqual(
outputs,
[
[{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
[{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
],
)
with self.assertRaises(TypeError):
generator(4)
@require_torch
def test_small_model_pt(self):
generator = pipeline(
"text2text-generation",
model="patrickvonplaten/t5-tiny-random",
num_beams=1,
max_new_tokens=9,
)
# do_sample=False necessary for reproducibility
outputs = generator("Something there", do_sample=False)
self.assertEqual(outputs, [{"generated_text": ""}])
num_return_sequences = 3
outputs = generator(
"Something there",
num_return_sequences=num_return_sequences,
num_beams=num_return_sequences,
)
target_outputs = [
{"generated_text": "Beide Beide Beide Beide Beide Beide Beide Beide Beide"},
{"generated_text": "Beide Beide Beide Beide Beide Beide Beide Beide"},
{"generated_text": ""},
]
self.assertEqual(outputs, target_outputs)
outputs = generator("This is a test", do_sample=True, num_return_sequences=2, return_tensors=True)
self.assertEqual(
outputs,
[
{"generated_token_ids": ANY(torch.Tensor)},
{"generated_token_ids": ANY(torch.Tensor)},
],
)
generator.tokenizer.pad_token_id = generator.model.config.eos_token_id
generator.tokenizer.pad_token = "<pad>"
outputs = generator(
["This is a test", "This is a second test"],
do_sample=True,
num_return_sequences=2,
batch_size=2,
return_tensors=True,
)
self.assertEqual(
outputs,
[
[
{"generated_token_ids": ANY(torch.Tensor)},
{"generated_token_ids": ANY(torch.Tensor)},
],
[
{"generated_token_ids": ANY(torch.Tensor)},
{"generated_token_ids": ANY(torch.Tensor)},
],
],
)

View File

@@ -0,0 +1,224 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers import (
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
TextClassificationPipeline,
pipeline,
)
from transformers.testing_utils import (
is_pipeline_test,
is_torch_available,
nested_simplify,
require_torch,
require_torch_bf16,
require_torch_fp16,
slow,
torch_device,
)
from .test_pipelines_common import ANY
if is_torch_available():
import torch
# These 2 model types require different inputs than those of the usual text models.
_TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}
@is_pipeline_test
class TextClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
if not hasattr(model_mapping, "is_dummy"):
model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ not in _TO_SKIP}
@require_torch
def test_small_model_pt(self):
text_classifier = pipeline(task="text-classification", model="hf-internal-testing/tiny-random-distilbert")
outputs = text_classifier("This is great !")
self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.504}])
outputs = text_classifier("This is great !", top_k=2)
self.assertEqual(
nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.504}, {"label": "LABEL_1", "score": 0.496}]
)
outputs = text_classifier(["This is great !", "This is bad"], top_k=2)
self.assertEqual(
nested_simplify(outputs),
[
[{"label": "LABEL_0", "score": 0.504}, {"label": "LABEL_1", "score": 0.496}],
[{"label": "LABEL_0", "score": 0.504}, {"label": "LABEL_1", "score": 0.496}],
],
)
outputs = text_classifier("This is great !", top_k=1)
self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.504}])
# Legacy behavior
outputs = text_classifier("This is great !", return_all_scores=False)
self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.504}])
outputs = text_classifier("This is great !", return_all_scores=True)
self.assertEqual(
nested_simplify(outputs), [[{"label": "LABEL_0", "score": 0.504}, {"label": "LABEL_1", "score": 0.496}]]
)
outputs = text_classifier(["This is great !", "Something else"], return_all_scores=True)
self.assertEqual(
nested_simplify(outputs),
[
[{"label": "LABEL_0", "score": 0.504}, {"label": "LABEL_1", "score": 0.496}],
[{"label": "LABEL_0", "score": 0.504}, {"label": "LABEL_1", "score": 0.496}],
],
)
outputs = text_classifier(["This is great !", "Something else"], return_all_scores=False)
self.assertEqual(
nested_simplify(outputs),
[
{"label": "LABEL_0", "score": 0.504},
{"label": "LABEL_0", "score": 0.504},
],
)
# Do not apply any function to output for regression tasks
# hack: changing problem_type artificially (so keep this test at last)
text_classifier.model.config.problem_type = "regression"
outputs = text_classifier("This is great !")
self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.01}])
@require_torch
def test_accepts_torch_device(self):
text_classifier = pipeline(
task="text-classification",
model="hf-internal-testing/tiny-random-distilbert",
device=torch_device,
)
outputs = text_classifier("This is great !")
self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.504}])
@require_torch_fp16
def test_accepts_torch_fp16(self):
text_classifier = pipeline(
task="text-classification",
model="hf-internal-testing/tiny-random-distilbert",
device=torch_device,
dtype=torch.float16,
)
outputs = text_classifier("This is great !")
self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.504}])
@require_torch_bf16
def test_accepts_torch_bf16(self):
text_classifier = pipeline(
task="text-classification",
model="hf-internal-testing/tiny-random-distilbert",
device=torch_device,
dtype=torch.bfloat16,
)
outputs = text_classifier("This is great !")
self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.504}])
@slow
@require_torch
def test_pt_bert(self):
text_classifier = pipeline("text-classification")
outputs = text_classifier("This is great !")
self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 1.0}])
outputs = text_classifier("This is bad !")
self.assertEqual(nested_simplify(outputs), [{"label": "NEGATIVE", "score": 1.0}])
outputs = text_classifier("Birds are a type of animal")
self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}])
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
text_classifier = TextClassificationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
)
return text_classifier, ["HuggingFace is in", "This is another test"]
def run_pipeline_test(self, text_classifier, _):
model = text_classifier.model
# Small inputs because BartTokenizer tiny has maximum position embeddings = 22
valid_inputs = "HuggingFace is in"
outputs = text_classifier(valid_inputs)
self.assertEqual(nested_simplify(outputs), [{"label": ANY(str), "score": ANY(float)}])
self.assertTrue(outputs[0]["label"] in model.config.id2label.values())
valid_inputs = ["HuggingFace is in ", "Paris is in France"]
outputs = text_classifier(valid_inputs)
self.assertEqual(
nested_simplify(outputs),
[{"label": ANY(str), "score": ANY(float)}, {"label": ANY(str), "score": ANY(float)}],
)
self.assertTrue(outputs[0]["label"] in model.config.id2label.values())
self.assertTrue(outputs[1]["label"] in model.config.id2label.values())
# Forcing to get all results with `top_k=None`
# This is NOT the legacy format
outputs = text_classifier(valid_inputs, top_k=None)
N = len(model.config.id2label.values())
self.assertEqual(
nested_simplify(outputs),
[[{"label": ANY(str), "score": ANY(float)}] * N, [{"label": ANY(str), "score": ANY(float)}] * N],
)
valid_inputs = {"text": "HuggingFace is in ", "text_pair": "Paris is in France"}
outputs = text_classifier(valid_inputs)
self.assertEqual(
nested_simplify(outputs),
{"label": ANY(str), "score": ANY(float)},
)
self.assertTrue(outputs["label"] in model.config.id2label.values())
# This might be used a text pair, but tokenizer + pipe interaction
# makes it hard to understand that it's not using the pair properly
# https://github.com/huggingface/transformers/issues/17305
# We disabled this usage instead as it was outputting wrong outputs.
invalid_input = [["HuggingFace is in ", "Paris is in France"]]
with self.assertRaises(ValueError):
text_classifier(invalid_input)
# This used to be valid for doing text pairs
# We're keeping it working because of backward compatibility
outputs = text_classifier([[["HuggingFace is in ", "Paris is in France"]]])
self.assertEqual(
nested_simplify(outputs),
[{"label": ANY(str), "score": ANY(float)}],
)
self.assertTrue(outputs[0]["label"] in model.config.id2label.values())

View File

@@ -0,0 +1,558 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from unittest.mock import patch
from transformers import (
MODEL_FOR_CAUSAL_LM_MAPPING,
TextGenerationPipeline,
logging,
pipeline,
)
from transformers.testing_utils import (
CaptureLogger,
is_pipeline_test,
require_accelerate,
require_torch,
require_torch_accelerator,
torch_device,
)
from .test_pipelines_common import ANY
@is_pipeline_test
@require_torch
class TextGenerationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_CAUSAL_LM_MAPPING
@require_torch
def test_small_model_pt(self):
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-random-LlamaForCausalLM",
max_new_tokens=10,
)
# Using `do_sample=False` to force deterministic output
outputs = text_generator("This is a test", do_sample=False)
self.assertEqual(outputs, [{"generated_text": "This is a testкт MéxicoWSAnimImportдели pip letscosatur"}])
outputs = text_generator(["This is a test", "This is a second test"], do_sample=False)
self.assertEqual(
outputs,
[
[{"generated_text": "This is a testкт MéxicoWSAnimImportдели pip letscosatur"}],
[{"generated_text": "This is a second testкт MéxicoWSAnimImportдели Düsseld bootstrap learn user"}],
],
)
outputs = text_generator("This is a test", do_sample=True, num_return_sequences=2, return_tensors=True)
self.assertEqual(
outputs,
[
{"generated_token_ids": ANY(list)},
{"generated_token_ids": ANY(list)},
],
)
@require_torch
def test_small_chat_model_pt(self):
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
# Using `do_sample=False` to force deterministic output
chat1 = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
]
chat2 = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a second test"},
]
outputs = text_generator(chat1, do_sample=False, max_new_tokens=10)
expected_chat1 = chat1 + [
{
"role": "assistant",
"content": " factors factors factors factors factors factors factors factors factors factors",
}
]
self.assertEqual(
outputs,
[
{"generated_text": expected_chat1},
],
)
outputs = text_generator([chat1, chat2], do_sample=False, max_new_tokens=10)
expected_chat2 = chat2 + [
{
"role": "assistant",
"content": " stairs stairs stairs stairs stairs stairs stairs stairs stairs stairs",
}
]
self.assertEqual(
outputs,
[
[{"generated_text": expected_chat1}],
[{"generated_text": expected_chat2}],
],
)
@require_torch
def test_small_chat_model_continue_final_message(self):
# Here we check that passing a chat that ends in an assistant message is handled correctly
# by continuing the final message rather than starting a new one
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
# Using `do_sample=False` to force deterministic output
chat1 = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
{"role": "assistant", "content": "This is"},
]
outputs = text_generator(chat1, do_sample=False, max_new_tokens=10)
# Assert that we continued the last message and there isn't a sneaky <|im_end|>
self.assertEqual(
outputs,
[
{
"generated_text": [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
{
"role": "assistant",
"content": "This is stairs stairs stairs stairs stairs stairs stairs stairs stairs stairs",
},
]
}
],
)
@require_torch
def test_small_chat_model_continue_final_message_override(self):
# Here we check that passing a chat that ends in an assistant message is handled correctly
# by continuing the final message rather than starting a new one
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
# Using `do_sample=False` to force deterministic output
chat1 = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
]
outputs = text_generator(chat1, do_sample=False, max_new_tokens=10, continue_final_message=True)
# Assert that we continued the last message and there isn't a sneaky <|im_end|>
self.assertEqual(
outputs,
[
{
"generated_text": [
{"role": "system", "content": "This is a system message."},
{
"role": "user",
"content": "This is a test stairs stairs stairs stairs stairs stairs stairs stairs stairs stairs",
},
]
}
],
)
@require_torch
def test_small_chat_model_with_dataset_pt(self):
from torch.utils.data import Dataset
from transformers.pipelines.pt_utils import KeyDataset
class MyDataset(Dataset):
data = [
[
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
],
]
def __len__(self):
return 1
def __getitem__(self, i):
return {"text": self.data[i]}
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
dataset = MyDataset()
key_dataset = KeyDataset(dataset, "text")
for outputs in text_generator(key_dataset, do_sample=False, max_new_tokens=10):
expected_chat = dataset.data[0] + [
{
"role": "assistant",
"content": " factors factors factors factors factors factors factors factors factors factors",
}
]
self.assertEqual(
outputs,
[
{"generated_text": expected_chat},
],
)
@require_torch
def test_small_chat_model_with_iterator_pt(self):
from transformers.pipelines.pt_utils import PipelineIterator
text_generator = pipeline(
task="text-generation",
model="hf-internal-testing/tiny-gpt2-with-chatml-template",
)
# Using `do_sample=False` to force deterministic output
chat1 = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
]
chat2 = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a second test"},
]
expected_chat1 = chat1 + [
{
"role": "assistant",
"content": " factors factors factors factors factors factors factors factors factors factors",
}
]
expected_chat2 = chat2 + [
{
"role": "assistant",
"content": " stairs stairs stairs stairs stairs stairs stairs stairs stairs stairs",
}
]
def data():
yield from [chat1, chat2]
outputs = text_generator(data(), do_sample=False, max_new_tokens=10)
assert isinstance(outputs, PipelineIterator)
outputs = list(outputs)
self.assertEqual(
outputs,
[
[{"generated_text": expected_chat1}],
[{"generated_text": expected_chat2}],
],
)
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
text_generator = TextGenerationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
max_new_tokens=5,
)
return text_generator, ["This is a test", "Another test"]
def test_stop_sequence_stopping_criteria(self):
prompt = """Hello I believe in"""
text_generator = pipeline(
"text-generation", model="hf-internal-testing/tiny-random-gpt2", max_new_tokens=5, do_sample=False
)
output = text_generator(prompt)
self.assertEqual(
output,
[{"generated_text": "Hello I believe in fe fe fe fe fe"}],
)
output = text_generator(prompt, stop_sequence=" fe")
self.assertEqual(output, [{"generated_text": "Hello I believe in fe"}])
def run_pipeline_test(self, text_generator, _):
model = text_generator.model
tokenizer = text_generator.tokenizer
outputs = text_generator("This is a test")
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
self.assertTrue(outputs[0]["generated_text"].startswith("This is a test"))
outputs = text_generator("This is a test", return_full_text=False)
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
self.assertNotIn("This is a test", outputs[0]["generated_text"])
text_generator = pipeline(
task="text-generation", model=model, tokenizer=tokenizer, return_full_text=False, max_new_tokens=5
)
outputs = text_generator("This is a test")
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
self.assertNotIn("This is a test", outputs[0]["generated_text"])
outputs = text_generator("This is a test", return_full_text=True)
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
self.assertTrue(outputs[0]["generated_text"].startswith("This is a test"))
outputs = text_generator(["This is great !", "Something else"], num_return_sequences=2, do_sample=True)
self.assertEqual(
outputs,
[
[{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
[{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
],
)
if text_generator.tokenizer.pad_token is not None:
outputs = text_generator(
["This is great !", "Something else"], num_return_sequences=2, batch_size=2, do_sample=True
)
self.assertEqual(
outputs,
[
[{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
[{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
],
)
with self.assertRaises(ValueError):
outputs = text_generator("test", return_full_text=True, return_text=True)
with self.assertRaises(ValueError):
outputs = text_generator("test", return_full_text=True, return_tensors=True)
with self.assertRaises(ValueError):
outputs = text_generator("test", return_text=True, return_tensors=True)
# Empty prompt is slightly special
# it requires BOS token to exist.
# Special case for Pegasus which will always append EOS so will
# work even without BOS.
if (
text_generator.tokenizer.bos_token_id is not None
or "Pegasus" in tokenizer.__class__.__name__
or "Git" in model.__class__.__name__
):
outputs = text_generator("")
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
else:
with self.assertRaises((ValueError, AssertionError)):
outputs = text_generator("", add_special_tokens=False)
# We don't care about infinite range models.
# They already work.
# Skip this test for XGLM, since it uses sinusoidal positional embeddings which are resized on-the-fly.
EXTRA_MODELS_CAN_HANDLE_LONG_INPUTS = [
"RwkvForCausalLM",
"XGLMForCausalLM",
"GPTNeoXForCausalLM",
"GPTNeoXJapaneseForCausalLM",
"FuyuForCausalLM",
"LlamaForCausalLM",
]
if (
tokenizer.model_max_length < 10000
and text_generator.model.__class__.__name__ not in EXTRA_MODELS_CAN_HANDLE_LONG_INPUTS
):
# Handling of large generations
if str(text_generator.device) == "cpu":
with self.assertRaises((RuntimeError, IndexError, ValueError, AssertionError)):
text_generator("This is a test" * 500, max_new_tokens=5)
outputs = text_generator("This is a test" * 500, handle_long_generation="hole", max_new_tokens=5)
# Hole strategy cannot work
if str(text_generator.device) == "cpu":
with self.assertRaises(ValueError):
text_generator(
"This is a test" * 500,
handle_long_generation="hole",
max_new_tokens=tokenizer.model_max_length + 10,
)
@require_torch
@require_accelerate
@require_torch_accelerator
def test_small_model_pt_bloom_accelerate(self):
import torch
# Classic `model_kwargs`
pipe = pipeline(
model="hf-internal-testing/tiny-random-bloom",
model_kwargs={"device_map": "auto", "dtype": torch.bfloat16},
max_new_tokens=5,
do_sample=False,
)
self.assertEqual(pipe.model.lm_head.weight.dtype, torch.bfloat16)
out = pipe("This is a test")
self.assertEqual(
out,
[{"generated_text": ("This is a test test test test test test")}],
)
# Upgraded those two to real pipeline arguments (they just get sent for the model as they're unlikely to mean anything else.)
pipe = pipeline(
model="hf-internal-testing/tiny-random-bloom",
device_map="auto",
dtype=torch.bfloat16,
max_new_tokens=5,
do_sample=False,
)
self.assertEqual(pipe.model.lm_head.weight.dtype, torch.bfloat16)
out = pipe("This is a test")
self.assertEqual(
out,
[{"generated_text": ("This is a test test test test test test")}],
)
# dtype will be automatically set to torch.bfloat16 if not provided - check: https://github.com/huggingface/transformers/pull/38882
pipe = pipeline(
model="hf-internal-testing/tiny-random-bloom", device_map="auto", max_new_tokens=5, do_sample=False
)
self.assertEqual(pipe.model.lm_head.weight.dtype, torch.bfloat16)
out = pipe("This is a test")
self.assertEqual(
out,
[{"generated_text": ("This is a test test test test test test")}],
)
@require_torch
@require_torch_accelerator
def test_small_model_fp16(self):
import torch
pipe = pipeline(
model="hf-internal-testing/tiny-random-bloom",
device=torch_device,
dtype=torch.float16,
max_new_tokens=3,
)
pipe("This is a test")
@require_torch
@require_accelerate
@require_torch_accelerator
def test_pipeline_accelerate_top_p(self):
import torch
pipe = pipeline(
model="hf-internal-testing/tiny-random-bloom",
device_map=torch_device,
dtype=torch.float16,
max_new_tokens=3,
)
pipe("This is a test", do_sample=True, top_p=0.5)
def test_pipeline_length_setting_warning(self):
prompt = """Hello world"""
text_generator = pipeline("text-generation", model="hf-internal-testing/tiny-random-gpt2", max_new_tokens=5)
logger = logging.get_logger("transformers.generation.utils")
logger_msg = "Both `max_new_tokens`" # The beginning of the message to be checked in this test
# Both are set by the user -> log warning
with CaptureLogger(logger) as cl:
_ = text_generator(prompt, max_length=10, max_new_tokens=1)
self.assertIn(logger_msg, cl.out)
# The user only sets one -> no warning
with CaptureLogger(logger) as cl:
_ = text_generator(prompt, max_new_tokens=1)
self.assertNotIn(logger_msg, cl.out)
with CaptureLogger(logger) as cl:
_ = text_generator(prompt, max_length=10, max_new_tokens=None)
self.assertNotIn(logger_msg, cl.out)
def test_return_dict_in_generate(self):
text_generator = pipeline("text-generation", model="hf-internal-testing/tiny-random-gpt2", max_new_tokens=2)
out = text_generator(
["This is great !", "Something else"], return_dict_in_generate=True, output_logits=True, output_scores=True
)
self.assertEqual(
out,
[
[
{
"generated_text": ANY(str),
"logits": ANY(list),
"scores": ANY(list),
},
],
[
{
"generated_text": ANY(str),
"logits": ANY(list),
"scores": ANY(list),
},
],
],
)
@require_torch
def test_pipeline_assisted_generation(self):
"""Tests that we can run assisted generation in the pipeline"""
model = "hf-internal-testing/tiny-random-MistralForCausalLM"
pipe = pipeline("text-generation", model=model, assistant_model=model, max_new_tokens=2)
# We can run the pipeline
prompt = "Hello world"
_ = pipe(prompt)
# It is running assisted generation under the hood (e.g. flags incompatible with assisted gen will crash)
with self.assertRaises(ValueError):
_ = pipe(prompt, generate_kwargs={"num_beams": 2})
@require_torch
def test_pipeline_skip_special_tokens(self):
"""Tests that we can use `skip_special_tokens=False` to get the special tokens in the output"""
model_id = "google/gemma-3-270m-it"
chat = [{"role": "user", "content": "What's your name?"}]
generator = pipeline("text-generation", model=model_id)
# normal pipeline use
output = generator(chat, max_new_tokens=20, do_sample=False)
self.assertNotIn("<end_of_turn>", str(output[0]["generated_text"]))
# forcing special tokens to be included in the output
output = generator(chat, max_new_tokens=1000, do_sample=False, skip_special_tokens=False)
self.assertIn("<end_of_turn>", str(output[0]["generated_text"]))
@require_torch
def test_forward_tokenizer_kwargs(self):
chat = [
{"role": "system", "content": "This is a system message."},
{"role": "user", "content": "This is a test"},
]
model = "hf-internal-testing/tiny-gpt2-with-chatml-template"
text_generator = pipeline("text-generation", model, max_new_tokens=5)
tokenizer = text_generator.tokenizer
with patch.object(tokenizer, "apply_chat_template", wraps=tokenizer.apply_chat_template) as mock:
text_generator(chat, tokenizer_encode_kwargs={"enable_thinking": True})
self.assertGreater(mock.call_count, 0)
kw_call_args = mock.call_args[1]
self.assertIn("enable_thinking", kw_call_args)
self.assertEqual(kw_call_args["enable_thinking"], True)

View File

@@ -0,0 +1,301 @@
# Copyright 2023 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from transformers import (
MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING,
AutoProcessor,
TextToAudioPipeline,
pipeline,
)
from transformers.testing_utils import (
is_pipeline_test,
require_torch,
require_torch_accelerator,
slow,
torch_device,
)
from transformers.trainer_utils import set_seed
from .test_pipelines_common import ANY
@is_pipeline_test
@require_torch
class TextToAudioPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING
# for now only test text_to_waveform and not text_to_spectrogram
@require_torch
def test_small_musicgen_pt(self):
music_generator = pipeline(
task="text-to-audio", model="facebook/musicgen-small", do_sample=False, max_new_tokens=5
)
outputs = music_generator("This is a test")
self.assertEqual({"audio": ANY(np.ndarray), "sampling_rate": 32000}, outputs)
# test two examples side-by-side
outputs = music_generator(["This is a test", "This is a second test"])
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
# test batching, this time with parameterization in the forward pass
music_generator = pipeline(task="text-to-audio", model="facebook/musicgen-small")
forward_params = {"do_sample": False, "max_new_tokens": 5}
outputs = music_generator(
["This is a test", "This is a second test"], forward_params=forward_params, batch_size=2
)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
@slow
@require_torch
def test_medium_seamless_m4t_pt(self):
speech_generator = pipeline(task="text-to-audio", model="facebook/hf-seamless-m4t-medium", max_new_tokens=5)
for forward_params in [{"tgt_lang": "eng"}, {"return_intermediate_token_ids": True, "tgt_lang": "eng"}]:
outputs = speech_generator("This is a test", forward_params=forward_params)
self.assertEqual({"audio": ANY(np.ndarray), "sampling_rate": 16000}, outputs)
# test two examples side-by-side
outputs = speech_generator(["This is a test", "This is a second test"], forward_params=forward_params)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
# test batching
outputs = speech_generator(
["This is a test", "This is a second test"], forward_params=forward_params, batch_size=2
)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
@slow
@require_torch
def test_small_bark_pt(self):
speech_generator = pipeline(task="text-to-audio", model="suno/bark-small")
forward_params = {
# Using `do_sample=False` to force deterministic output
"do_sample": False,
"semantic_max_new_tokens": 5,
}
outputs = speech_generator("This is a test", forward_params=forward_params)
self.assertEqual(
{"audio": ANY(np.ndarray), "sampling_rate": 24000},
outputs,
)
# test two examples side-by-side
outputs = speech_generator(
["This is a test", "This is a second test"],
forward_params=forward_params,
)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
# test other generation strategy
forward_params = {
"do_sample": True,
"semantic_max_new_tokens": 5,
"semantic_num_return_sequences": 2,
}
outputs = speech_generator("This is a test", forward_params=forward_params)
audio = outputs["audio"]
self.assertEqual(ANY(np.ndarray), audio)
# test using a speaker embedding
processor = AutoProcessor.from_pretrained("suno/bark-small")
temp_inp = processor("hey, how are you?", voice_preset="v2/en_speaker_5")
history_prompt = temp_inp["history_prompt"]
forward_params["history_prompt"] = history_prompt
outputs = speech_generator(
["This is a test", "This is a second test"],
forward_params=forward_params,
batch_size=2,
)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
@slow
@require_torch_accelerator
def test_conversion_additional_tensor(self):
speech_generator = pipeline(task="text-to-audio", model="suno/bark-small", device=torch_device)
processor = AutoProcessor.from_pretrained("suno/bark-small")
forward_params = {
"do_sample": True,
"semantic_max_new_tokens": 5,
}
# atm, must do to stay coherent with BarkProcessor
preprocess_params = {
"max_length": 256,
"add_special_tokens": False,
"return_attention_mask": True,
"return_token_type_ids": False,
"padding": "max_length",
}
outputs = speech_generator(
"This is a test",
forward_params=forward_params,
preprocess_params=preprocess_params,
)
temp_inp = processor("hey, how are you?", voice_preset="v2/en_speaker_5")
history_prompt = temp_inp["history_prompt"]
forward_params["history_prompt"] = history_prompt
# history_prompt is a torch.Tensor passed as a forward_param
# if generation is successful, it means that it was passed to the right device
outputs = speech_generator(
"This is a test", forward_params=forward_params, preprocess_params=preprocess_params
)
self.assertEqual(
{"audio": ANY(np.ndarray), "sampling_rate": 24000},
outputs,
)
@require_torch
def test_vits_model_pt(self):
speech_generator = pipeline(task="text-to-audio", model="facebook/mms-tts-eng")
outputs = speech_generator("This is a test")
self.assertEqual(outputs["sampling_rate"], 16000)
audio = outputs["audio"]
self.assertEqual(ANY(np.ndarray), audio)
# test two examples side-by-side
outputs = speech_generator(["This is a test", "This is a second test"])
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
# test batching
outputs = speech_generator(["This is a test", "This is a second test"], batch_size=2)
self.assertEqual(ANY(np.ndarray), outputs[0]["audio"])
@require_torch
def test_forward_model_kwargs(self):
# use vits - a forward model
speech_generator = pipeline(task="text-to-audio", model="kakao-enterprise/vits-vctk")
# for reproducibility
set_seed(555)
outputs = speech_generator("This is a test", forward_params={"speaker_id": 5})
audio = outputs["audio"]
with self.assertRaises(TypeError):
# assert error if generate parameter
outputs = speech_generator("This is a test", forward_params={"speaker_id": 5, "do_sample": True})
forward_params = {"speaker_id": 5}
generate_kwargs = {"do_sample": True}
with self.assertRaises(ValueError):
# assert error if generate_kwargs with forward-only models
outputs = speech_generator(
"This is a test", forward_params=forward_params, generate_kwargs=generate_kwargs
)
self.assertTrue(np.abs(outputs["audio"] - audio).max() < 1e-5)
@require_torch
def test_generative_model_kwargs(self):
# use musicgen - a generative model
music_generator = pipeline(task="text-to-audio", model="facebook/musicgen-small")
forward_params = {
"do_sample": True,
"max_new_tokens": 20,
}
# for reproducibility
set_seed(555)
outputs = music_generator("This is a test", forward_params=forward_params)
audio = outputs["audio"]
self.assertEqual(ANY(np.ndarray), audio)
# make sure generate kwargs get priority over forward params
forward_params = {
"do_sample": False,
"max_new_tokens": 20,
}
generate_kwargs = {"do_sample": True}
# for reproducibility
set_seed(555)
outputs = music_generator("This is a test", forward_params=forward_params, generate_kwargs=generate_kwargs)
self.assertListEqual(outputs["audio"].tolist(), audio.tolist())
@slow
@require_torch
def test_csm_model_pt(self):
speech_generator = pipeline(task="text-to-audio", model="sesame/csm-1b")
outputs = speech_generator("[0]This is a test")
self.assertEqual(outputs["sampling_rate"], 24000)
audio = outputs["audio"]
self.assertEqual(ANY(np.ndarray), audio)
# test two examples side-by-side
outputs = speech_generator(["[0]This is a test", "[0]This is a second test"])
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
# test batching
outputs = speech_generator(["[0]This is a test", "[0]This is a second test"], batch_size=2)
self.assertEqual(ANY(np.ndarray), outputs[0]["audio"])
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
model_test_kwargs = {}
if model.can_generate(): # not all models in this pipeline can generate and, therefore, take `generate` kwargs
model_test_kwargs["max_new_tokens"] = 5
model.config._attn_implementation = "eager"
speech_generator = TextToAudioPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
**model_test_kwargs,
)
return speech_generator, ["This is a test", "Another test"]
def run_pipeline_test(self, speech_generator, _):
outputs = speech_generator("This is a test")
self.assertEqual(ANY(np.ndarray), outputs["audio"])
forward_params = (
{"num_return_sequences": 2, "do_sample": True} if speech_generator.model.can_generate() else {}
)
outputs = speech_generator(["This is great !", "Something else"], forward_params=forward_params)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,167 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import pytest
from transformers import (
MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
MBart50TokenizerFast,
MBartConfig,
MBartForConditionalGeneration,
TranslationPipeline,
pipeline,
)
from transformers.testing_utils import is_pipeline_test, require_torch, slow
from .test_pipelines_common import ANY
@is_pipeline_test
class TranslationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
if isinstance(model.config, MBartConfig):
src_lang, tgt_lang = list(tokenizer.lang_code_to_id.keys())[:2]
translator = TranslationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
src_lang=src_lang,
tgt_lang=tgt_lang,
max_new_tokens=20,
)
else:
translator = TranslationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
max_new_tokens=20,
)
return translator, ["Some string", "Some other text"]
def run_pipeline_test(self, translator, _):
outputs = translator("Some string")
self.assertEqual(outputs, [{"translation_text": ANY(str)}])
outputs = translator(["Some string"])
self.assertEqual(outputs, [{"translation_text": ANY(str)}])
outputs = translator(["Some string", "other string"])
self.assertEqual(outputs, [{"translation_text": ANY(str)}, {"translation_text": ANY(str)}])
@require_torch
def test_small_model_pt(self):
translator = pipeline("translation_en_to_ro", model="patrickvonplaten/t5-tiny-random")
outputs = translator("This is a test string", max_length=20)
self.assertEqual(
outputs,
[
{
"translation_text": (
"Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide"
" Beide Beide"
)
}
],
)
@require_torch
def test_en_to_de_pt(self):
translator = pipeline("translation_en_to_de", model="patrickvonplaten/t5-tiny-random")
outputs = translator("This is a test string", max_length=20)
self.assertEqual(
outputs,
[
{
"translation_text": (
"monoton monoton monoton monoton monoton monoton monoton monoton monoton monoton urine urine"
" urine urine urine urine urine urine urine"
)
}
],
)
class TranslationNewFormatPipelineTests(unittest.TestCase):
@require_torch
@slow
def test_default_translations(self):
# We don't provide a default for this pair
with self.assertRaises(ValueError):
pipeline(task="translation_cn_to_ar")
# but we do for this one
translator = pipeline(task="translation_en_to_de")
self.assertEqual(translator._preprocess_params["src_lang"], "en")
self.assertEqual(translator._preprocess_params["tgt_lang"], "de")
@require_torch
@slow
def test_multilingual_translation(self):
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
translator = pipeline(task="translation", model=model, tokenizer=tokenizer)
# Missing src_lang, tgt_lang
with self.assertRaises(ValueError):
translator("This is a test")
outputs = translator("This is a test", src_lang="en_XX", tgt_lang="ar_AR")
self.assertEqual(outputs, [{"translation_text": "هذا إختبار"}])
outputs = translator("This is a test", src_lang="en_XX", tgt_lang="hi_IN")
self.assertEqual(outputs, [{"translation_text": "यह एक परीक्षण है"}])
# src_lang, tgt_lang can be defined at pipeline call time
translator = pipeline(task="translation", model=model, tokenizer=tokenizer, src_lang="en_XX", tgt_lang="ar_AR")
outputs = translator("This is a test")
self.assertEqual(outputs, [{"translation_text": "هذا إختبار"}])
@require_torch
def test_translation_on_odd_language(self):
model = "patrickvonplaten/t5-tiny-random"
translator = pipeline(task="translation_cn_to_ar", model=model)
self.assertEqual(translator._preprocess_params["src_lang"], "cn")
self.assertEqual(translator._preprocess_params["tgt_lang"], "ar")
@require_torch
def test_translation_default_language_selection(self):
model = "patrickvonplaten/t5-tiny-random"
with pytest.warns(UserWarning, match=r".*translation_en_to_de.*"):
translator = pipeline(task="translation", model=model)
self.assertEqual(translator.task, "translation_en_to_de")
self.assertEqual(translator._preprocess_params["src_lang"], "en")
self.assertEqual(translator._preprocess_params["tgt_lang"], "de")
@require_torch
def test_translation_with_no_language_no_model_fails(self):
with self.assertRaises(ValueError):
pipeline(task="translation")

View File

@@ -0,0 +1,124 @@
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from huggingface_hub import VideoClassificationOutputElement, hf_hub_download
from transformers import MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING, VideoMAEFeatureExtractor
from transformers.pipelines import VideoClassificationPipeline, pipeline
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_av,
require_torch,
require_vision,
)
from .test_pipelines_common import ANY
@is_pipeline_test
@require_torch
@require_vision
@require_av
class VideoClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING
example_video_filepath = None
@classmethod
def _load_dataset(cls):
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
if cls.example_video_filepath is None:
cls.example_video_filepath = hf_hub_download(
repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset"
)
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
self._load_dataset()
video_classifier = VideoClassificationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
top_k=2,
)
examples = [
self.example_video_filepath,
# TODO: re-enable this once we have a stable hub solution for CI
# "https://huggingface.co/datasets/nateraw/video-demo/resolve/main/archery.mp4",
]
return video_classifier, examples
def run_pipeline_test(self, video_classifier, examples):
for example in examples:
outputs = video_classifier(example)
self.assertEqual(
outputs,
[
{"score": ANY(float), "label": ANY(str)},
{"score": ANY(float), "label": ANY(str)},
],
)
for element in outputs:
compare_pipeline_output_to_hub_spec(element, VideoClassificationOutputElement)
@require_torch
def test_small_model_pt(self):
small_model = "hf-internal-testing/tiny-random-VideoMAEForVideoClassification"
small_feature_extractor = VideoMAEFeatureExtractor(
size={"shortest_edge": 10}, crop_size={"height": 10, "width": 10}
)
video_classifier = pipeline(
"video-classification", model=small_model, feature_extractor=small_feature_extractor, frame_sampling_rate=4
)
video_file_path = hf_hub_download(repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset")
output = video_classifier(video_file_path, top_k=2)
self.assertEqual(
nested_simplify(output, decimals=4),
[{"score": 0.5199, "label": "LABEL_0"}, {"score": 0.4801, "label": "LABEL_1"}],
)
for element in output:
compare_pipeline_output_to_hub_spec(element, VideoClassificationOutputElement)
outputs = video_classifier(
[
video_file_path,
video_file_path,
],
top_k=2,
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[{"score": 0.5199, "label": "LABEL_0"}, {"score": 0.4801, "label": "LABEL_1"}],
[{"score": 0.5199, "label": "LABEL_0"}, {"score": 0.4801, "label": "LABEL_1"}],
],
)
for output in outputs:
for element in output:
compare_pipeline_output_to_hub_spec(element, VideoClassificationOutputElement)

View File

@@ -0,0 +1,247 @@
# Copyright 2022 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from datasets import load_dataset
from transformers import MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING, is_vision_available
from transformers.pipelines import pipeline
from transformers.testing_utils import (
is_pipeline_test,
is_torch_available,
nested_simplify,
require_torch,
require_torch_accelerator,
require_vision,
slow,
torch_device,
)
from .test_pipelines_common import ANY
if is_torch_available():
import torch
from transformers.pipelines.pt_utils import KeyDataset
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
@is_pipeline_test
@require_torch
@require_vision
class VisualQuestionAnsweringPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
vqa_pipeline = pipeline(
"visual-question-answering",
model="hf-internal-testing/tiny-vilt-random-vqa",
dtype=dtype,
)
examples = [
{
"image": Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"question": "How many cats are there?",
},
{
"image": "./tests/fixtures/tests_samples/COCO/000000039769.png",
"question": "How many cats are there?",
},
]
return vqa_pipeline, examples
def run_pipeline_test(self, vqa_pipeline, examples):
outputs = vqa_pipeline(examples, top_k=1)
self.assertEqual(
outputs,
[
[{"score": ANY(float), "answer": ANY(str)}],
[{"score": ANY(float), "answer": ANY(str)}],
],
)
@require_torch
def test_small_model_pt(self):
vqa_pipeline = pipeline("visual-question-answering", model="hf-internal-testing/tiny-vilt-random-vqa")
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
question = "How many cats are there?"
outputs = vqa_pipeline(image=image, question="How many cats are there?", top_k=2)
self.assertEqual(
outputs, [{"score": ANY(float), "answer": ANY(str)}, {"score": ANY(float), "answer": ANY(str)}]
)
outputs = vqa_pipeline({"image": image, "question": question}, top_k=2)
self.assertEqual(
outputs, [{"score": ANY(float), "answer": ANY(str)}, {"score": ANY(float), "answer": ANY(str)}]
)
@require_torch
@require_torch_accelerator
def test_small_model_pt_blip2(self):
vqa_pipeline = pipeline(
"visual-question-answering", model="hf-internal-testing/tiny-random-Blip2ForConditionalGeneration"
)
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
question = "How many cats are there?"
outputs = vqa_pipeline(image=image, question=question)
self.assertEqual(outputs, [{"answer": ANY(str)}])
outputs = vqa_pipeline({"image": image, "question": question})
self.assertEqual(outputs, [{"answer": ANY(str)}])
outputs = vqa_pipeline([{"image": image, "question": question}, {"image": image, "question": question}])
self.assertEqual(outputs, [[{"answer": ANY(str)}]] * 2)
vqa_pipeline = pipeline(
"visual-question-answering",
model="hf-internal-testing/tiny-random-Blip2ForConditionalGeneration",
model_kwargs={"dtype": torch.float16},
device=torch_device,
)
self.assertEqual(vqa_pipeline.model.device, torch.device(f"{torch_device}:0"))
self.assertEqual(vqa_pipeline.model.language_model.dtype, torch.float16)
self.assertEqual(vqa_pipeline.model.vision_model.dtype, torch.float16)
outputs = vqa_pipeline(image=image, question=question)
self.assertEqual(outputs, [{"answer": ANY(str)}])
@slow
@require_torch
def test_large_model_pt(self):
vqa_pipeline = pipeline("visual-question-answering", model="dandelin/vilt-b32-finetuned-vqa")
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
question = "How many cats are there?"
outputs = vqa_pipeline(image=image, question=question, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=4), [{"score": 0.8799, "answer": "2"}, {"score": 0.296, "answer": "1"}]
)
outputs = vqa_pipeline({"image": image, "question": question}, top_k=2)
self.assertEqual(
nested_simplify(outputs, decimals=4), [{"score": 0.8799, "answer": "2"}, {"score": 0.296, "answer": "1"}]
)
outputs = vqa_pipeline(
[{"image": image, "question": question}, {"image": image, "question": question}], top_k=2
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[[{"score": 0.8799, "answer": "2"}, {"score": 0.296, "answer": "1"}]] * 2,
)
@slow
@require_torch
@require_torch_accelerator
def test_large_model_pt_blip2(self):
vqa_pipeline = pipeline(
"visual-question-answering",
model="Salesforce/blip2-opt-2.7b",
model_kwargs={"dtype": torch.float16},
device=torch_device,
)
self.assertEqual(vqa_pipeline.model.device, torch.device(f"{torch_device}:0"))
self.assertEqual(vqa_pipeline.model.language_model.dtype, torch.float16)
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
question = "Question: how many cats are there? Answer:"
outputs = vqa_pipeline(image=image, question=question)
self.assertEqual(outputs, [{"answer": "two"}])
outputs = vqa_pipeline({"image": image, "question": question})
self.assertEqual(outputs, [{"answer": "two"}])
outputs = vqa_pipeline([{"image": image, "question": question}, {"image": image, "question": question}])
self.assertEqual(outputs, [[{"answer": "two"}]] * 2)
@require_torch
def test_small_model_pt_image_list(self):
vqa_pipeline = pipeline("visual-question-answering", model="hf-internal-testing/tiny-vilt-random-vqa")
images = [
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000004016.png",
]
outputs = vqa_pipeline(image=images, question="How many cats are there?", top_k=1)
self.assertEqual(
outputs, [[{"score": ANY(float), "answer": ANY(str)}], [{"score": ANY(float), "answer": ANY(str)}]]
)
@require_torch
def test_small_model_pt_question_list(self):
vqa_pipeline = pipeline("visual-question-answering", model="hf-internal-testing/tiny-vilt-random-vqa")
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
questions = ["How many cats are there?", "Are there any dogs?"]
outputs = vqa_pipeline(image=image, question=questions, top_k=1)
self.assertEqual(
outputs, [[{"score": ANY(float), "answer": ANY(str)}], [{"score": ANY(float), "answer": ANY(str)}]]
)
@require_torch
def test_small_model_pt_both_list(self):
vqa_pipeline = pipeline("visual-question-answering", model="hf-internal-testing/tiny-vilt-random-vqa")
images = [
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000004016.png",
]
questions = ["How many cats are there?", "Are there any dogs?"]
outputs = vqa_pipeline(image=images, question=questions, top_k=1)
self.assertEqual(
outputs,
[
[{"score": ANY(float), "answer": ANY(str)}],
[{"score": ANY(float), "answer": ANY(str)}],
[{"score": ANY(float), "answer": ANY(str)}],
[{"score": ANY(float), "answer": ANY(str)}],
],
)
@require_torch
def test_small_model_pt_dataset(self):
vqa_pipeline = pipeline("visual-question-answering", model="hf-internal-testing/tiny-vilt-random-vqa")
dataset = load_dataset("hf-internal-testing/dummy_image_text_data", split="train[:2]")
question = "What's in the image?"
outputs = vqa_pipeline(image=KeyDataset(dataset, "image"), question=question, top_k=1)
self.assertEqual(
outputs,
[
[{"score": ANY(float), "answer": ANY(str)}],
[{"score": ANY(float), "answer": ANY(str)}],
],
)

View File

@@ -0,0 +1,288 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers import (
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
Pipeline,
ZeroShotClassificationPipeline,
pipeline,
)
from transformers.testing_utils import (
is_pipeline_test,
is_torch_available,
nested_simplify,
require_torch,
slow,
)
from .test_pipelines_common import ANY
if is_torch_available():
import torch
# These 2 model types require different inputs than those of the usual text models.
_TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}
@is_pipeline_test
class ZeroShotClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
if not hasattr(model_mapping, "is_dummy"):
model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ not in _TO_SKIP}
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
classifier = ZeroShotClassificationPipeline(
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
image_processor=image_processor,
processor=processor,
dtype=dtype,
candidate_labels=["polics", "health"],
)
return classifier, ["Who are you voting for in 2020?", "My stomach hurts."]
def run_pipeline_test(self, classifier, _):
outputs = classifier("Who are you voting for in 2020?", candidate_labels="politics")
self.assertEqual(outputs, {"sequence": ANY(str), "labels": [ANY(str)], "scores": [ANY(float)]})
# No kwarg
outputs = classifier("Who are you voting for in 2020?", ["politics"])
self.assertEqual(outputs, {"sequence": ANY(str), "labels": [ANY(str)], "scores": [ANY(float)]})
outputs = classifier("Who are you voting for in 2020?", candidate_labels=["politics"])
self.assertEqual(outputs, {"sequence": ANY(str), "labels": [ANY(str)], "scores": [ANY(float)]})
outputs = classifier("Who are you voting for in 2020?", candidate_labels="politics, public health")
self.assertEqual(
outputs, {"sequence": ANY(str), "labels": [ANY(str), ANY(str)], "scores": [ANY(float), ANY(float)]}
)
self.assertAlmostEqual(sum(nested_simplify(outputs["scores"])), 1.0)
outputs = classifier("Who are you voting for in 2020?", candidate_labels=["politics", "public health"])
self.assertEqual(
outputs, {"sequence": ANY(str), "labels": [ANY(str), ANY(str)], "scores": [ANY(float), ANY(float)]}
)
self.assertAlmostEqual(sum(nested_simplify(outputs["scores"])), 1.0)
outputs = classifier(
"Who are you voting for in 2020?", candidate_labels="politics", hypothesis_template="This text is about {}"
)
self.assertEqual(outputs, {"sequence": ANY(str), "labels": [ANY(str)], "scores": [ANY(float)]})
# https://github.com/huggingface/transformers/issues/13846
outputs = classifier(["I am happy"], ["positive", "negative"])
self.assertEqual(
outputs,
[
{"sequence": ANY(str), "labels": [ANY(str), ANY(str)], "scores": [ANY(float), ANY(float)]}
for i in range(1)
],
)
outputs = classifier(["I am happy", "I am sad"], ["positive", "negative"])
self.assertEqual(
outputs,
[
{"sequence": ANY(str), "labels": [ANY(str), ANY(str)], "scores": [ANY(float), ANY(float)]}
for i in range(2)
],
)
with self.assertRaises(ValueError):
classifier("", candidate_labels="politics")
with self.assertRaises(TypeError):
classifier(None, candidate_labels="politics")
with self.assertRaises(ValueError):
classifier("Who are you voting for in 2020?", candidate_labels="")
with self.assertRaises(TypeError):
classifier("Who are you voting for in 2020?", candidate_labels=None)
with self.assertRaises(ValueError):
classifier(
"Who are you voting for in 2020?",
candidate_labels="politics",
hypothesis_template="Not formatting template",
)
with self.assertRaises(AttributeError):
classifier(
"Who are you voting for in 2020?",
candidate_labels="politics",
hypothesis_template=None,
)
self.run_entailment_id(classifier)
def run_entailment_id(self, zero_shot_classifier: Pipeline):
config = zero_shot_classifier.model.config
original_label2id = config.label2id
original_entailment = zero_shot_classifier.entailment_id
config.label2id = {"LABEL_0": 0, "LABEL_1": 1, "LABEL_2": 2}
self.assertEqual(zero_shot_classifier.entailment_id, -1)
config.label2id = {"entailment": 0, "neutral": 1, "contradiction": 2}
self.assertEqual(zero_shot_classifier.entailment_id, 0)
config.label2id = {"ENTAIL": 0, "NON-ENTAIL": 1}
self.assertEqual(zero_shot_classifier.entailment_id, 0)
config.label2id = {"ENTAIL": 2, "NEUTRAL": 1, "CONTR": 0}
self.assertEqual(zero_shot_classifier.entailment_id, 2)
zero_shot_classifier.model.config.label2id = original_label2id
self.assertEqual(original_entailment, zero_shot_classifier.entailment_id)
@require_torch
def test_truncation(self):
zero_shot_classifier = pipeline(
"zero-shot-classification",
model="sshleifer/tiny-distilbert-base-cased-distilled-squad",
)
# There was a regression in 4.10 for this
# Adding a test so we don't make the mistake again.
# https://github.com/huggingface/transformers/issues/13381#issuecomment-912343499
zero_shot_classifier(
"Who are you voting for in 2020?" * 100, candidate_labels=["politics", "public health", "science"]
)
@require_torch
def test_small_model_pt(self):
zero_shot_classifier = pipeline(
"zero-shot-classification",
model="sshleifer/tiny-distilbert-base-cased-distilled-squad",
)
outputs = zero_shot_classifier(
"Who are you voting for in 2020?", candidate_labels=["politics", "public health", "science"]
)
self.assertEqual(
nested_simplify(outputs),
{
"sequence": "Who are you voting for in 2020?",
"labels": ["science", "public health", "politics"],
"scores": [0.333, 0.333, 0.333],
},
)
@require_torch
def test_small_model_pt_fp16(self):
zero_shot_classifier = pipeline(
"zero-shot-classification",
model="sshleifer/tiny-distilbert-base-cased-distilled-squad",
dtype=torch.float16,
)
outputs = zero_shot_classifier(
"Who are you voting for in 2020?", candidate_labels=["politics", "public health", "science"]
)
self.assertEqual(
nested_simplify(outputs),
{
"sequence": "Who are you voting for in 2020?",
"labels": ["science", "public health", "politics"],
"scores": [0.333, 0.333, 0.333],
},
)
@require_torch
def test_small_model_pt_bf16(self):
zero_shot_classifier = pipeline(
"zero-shot-classification",
model="sshleifer/tiny-distilbert-base-cased-distilled-squad",
dtype=torch.bfloat16,
)
outputs = zero_shot_classifier(
"Who are you voting for in 2020?", candidate_labels=["politics", "public health", "science"]
)
self.assertEqual(
nested_simplify(outputs),
{
"sequence": "Who are you voting for in 2020?",
"labels": ["science", "public health", "politics"],
"scores": [0.333, 0.333, 0.333],
},
)
@slow
@require_torch
def test_large_model_pt(self):
zero_shot_classifier = pipeline("zero-shot-classification", model="FacebookAI/roberta-large-mnli")
outputs = zero_shot_classifier(
"Who are you voting for in 2020?", candidate_labels=["politics", "public health", "science"]
)
self.assertEqual(
nested_simplify(outputs),
{
"sequence": "Who are you voting for in 2020?",
"labels": ["politics", "public health", "science"],
"scores": [0.976, 0.015, 0.009],
},
)
outputs = zero_shot_classifier(
"The dominant sequence transduction models are based on complex recurrent or convolutional neural networks"
" in an encoder-decoder configuration. The best performing models also connect the encoder and decoder"
" through an attention mechanism. We propose a new simple network architecture, the Transformer, based"
" solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two"
" machine translation tasks show these models to be superior in quality while being more parallelizable"
" and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014"
" English-to-German translation task, improving over the existing best results, including ensembles by"
" over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new"
" single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small"
" fraction of the training costs of the best models from the literature. We show that the Transformer"
" generalizes well to other tasks by applying it successfully to English constituency parsing both with"
" large and limited training data.",
candidate_labels=["machine learning", "statistics", "translation", "vision"],
multi_label=True,
)
self.assertEqual(
nested_simplify(outputs),
{
"sequence": (
"The dominant sequence transduction models are based on complex recurrent or convolutional neural"
" networks in an encoder-decoder configuration. The best performing models also connect the"
" encoder and decoder through an attention mechanism. We propose a new simple network"
" architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence"
" and convolutions entirely. Experiments on two machine translation tasks show these models to be"
" superior in quality while being more parallelizable and requiring significantly less time to"
" train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task,"
" improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014"
" English-to-French translation task, our model establishes a new single-model state-of-the-art"
" BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training"
" costs of the best models from the literature. We show that the Transformer generalizes well to"
" other tasks by applying it successfully to English constituency parsing both with large and"
" limited training data."
),
"labels": ["translation", "machine learning", "vision", "statistics"],
"scores": [0.817, 0.713, 0.018, 0.018],
},
)

View File

@@ -0,0 +1,92 @@
# Copyright 2023 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from datasets import load_dataset
from transformers.pipelines import pipeline
from transformers.testing_utils import is_pipeline_test, nested_simplify, require_torch, slow
@is_pipeline_test
@require_torch
class ZeroShotAudioClassificationPipelineTests(unittest.TestCase):
# Deactivating auto tests since we don't have a good MODEL_FOR_XX mapping,
# and only CLAP would be there for now.
# model_mapping = {CLAPConfig: CLAPModel}
@require_torch
def test_small_model_pt(self, dtype="float32"):
audio_classifier = pipeline(
task="zero-shot-audio-classification",
model="hf-internal-testing/tiny-clap-htsat-unfused",
dtype=dtype,
)
dataset = load_dataset("hf-internal-testing/ashraq-esc50-1-dog-example")
audio = dataset["train"]["audio"][-1]["array"]
output = audio_classifier(audio, candidate_labels=["Sound of a dog", "Sound of vaccum cleaner"])
self.assertEqual(
nested_simplify(output),
[{"score": 0.501, "label": "Sound of a dog"}, {"score": 0.499, "label": "Sound of vaccum cleaner"}],
)
@require_torch
def test_small_model_pt_fp16(self):
self.test_small_model_pt(dtype="float16")
@slow
@require_torch
def test_large_model_pt(self):
audio_classifier = pipeline(
task="zero-shot-audio-classification",
model="laion/clap-htsat-unfused",
)
# This is an audio of a dog
dataset = load_dataset("hf-internal-testing/ashraq-esc50-1-dog-example")
audio = dataset["train"]["audio"][-1]["array"]
output = audio_classifier(audio, candidate_labels=["Sound of a dog", "Sound of vaccum cleaner"])
self.assertEqual(
nested_simplify(output),
[
{"score": 1.0, "label": "Sound of a dog"},
{"score": 0.0, "label": "Sound of vaccum cleaner"},
],
)
output = audio_classifier([audio] * 5, candidate_labels=["Sound of a dog", "Sound of vaccum cleaner"])
self.assertEqual(
nested_simplify(output),
[
[
{"score": 1.0, "label": "Sound of a dog"},
{"score": 0.0, "label": "Sound of vaccum cleaner"},
],
]
* 5,
)
output = audio_classifier(
[audio] * 5, candidate_labels=["Sound of a dog", "Sound of vaccum cleaner"], batch_size=5
)
self.assertEqual(
nested_simplify(output),
[
[
{"score": 1.0, "label": "Sound of a dog"},
{"score": 0.0, "label": "Sound of vaccum cleaner"},
],
]
* 5,
)

View File

@@ -0,0 +1,247 @@
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from huggingface_hub import ZeroShotImageClassificationOutputElement
from transformers import is_vision_available
from transformers.pipelines import pipeline
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_torch,
require_vision,
slow,
)
from .test_pipelines_common import ANY
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
@is_pipeline_test
@require_vision
class ZeroShotImageClassificationPipelineTests(unittest.TestCase):
# Deactivating auto tests since we don't have a good MODEL_FOR_XX mapping,
# and only CLIP would be there for now.
# model_mapping = {CLIPConfig: CLIPModel}
# def get_test_pipeline(self, model, tokenizer, processor):
# if tokenizer is None:
# # Side effect of no Fast Tokenizer class for these model, so skipping
# # But the slow tokenizer test should still run as they're quite small
# self.skipTest(reason="No tokenizer available")
# return
# # return None, None
# image_classifier = ZeroShotImageClassificationPipeline(
# model=model, tokenizer=tokenizer, feature_extractor=processor
# )
# # test with a raw waveform
# image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
# image2 = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
# return image_classifier, [image, image2]
# def run_pipeline_test(self, pipe, examples):
# image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
# outputs = pipe(image, candidate_labels=["A", "B"])
# self.assertEqual(outputs, {"text": ANY(str)})
# # Batching
# outputs = pipe([image] * 3, batch_size=2, candidate_labels=["A", "B"])
@require_torch
def test_small_model_pt(self, dtype="float32"):
image_classifier = pipeline(
model="hf-internal-testing/tiny-random-clip-zero-shot-image-classification", dtype=dtype
)
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
output = image_classifier(image, candidate_labels=["a", "b", "c"])
# The floating scores are so close, we enter floating error approximation and the order is not guaranteed across
# python and torch versions.
self.assertIn(
nested_simplify(output),
[
[{"score": 0.333, "label": "a"}, {"score": 0.333, "label": "b"}, {"score": 0.333, "label": "c"}],
[{"score": 0.333, "label": "a"}, {"score": 0.333, "label": "c"}, {"score": 0.333, "label": "b"}],
[{"score": 0.333, "label": "b"}, {"score": 0.333, "label": "a"}, {"score": 0.333, "label": "c"}],
],
)
output = image_classifier([image] * 5, candidate_labels=["A", "B", "C"], batch_size=2)
self.assertEqual(
nested_simplify(output),
# Pipeline outputs are supposed to be deterministic and
# So we could in theory have real values "A", "B", "C" instead
# of ANY(str).
# However it seems that in this particular case, the floating
# scores are so close, we enter floating error approximation
# and the order is not guaranteed anymore with batching.
[
[
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
],
[
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
],
[
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
],
[
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
],
[
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
{"score": 0.333, "label": ANY(str)},
],
],
)
for single_output in output:
compare_pipeline_output_to_hub_spec(single_output, ZeroShotImageClassificationOutputElement)
@require_torch
def test_small_model_pt_fp16(self):
self.test_small_model_pt(dtype="float16")
@slow
@require_torch
def test_large_model_pt(self):
image_classifier = pipeline(
task="zero-shot-image-classification",
model="openai/clip-vit-base-patch32",
)
# This is an image of 2 cats with remotes and no planes
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
output = image_classifier(image, candidate_labels=["cat", "plane", "remote"])
self.assertEqual(
nested_simplify(output),
[
{"score": 0.511, "label": "remote"},
{"score": 0.485, "label": "cat"},
{"score": 0.004, "label": "plane"},
],
)
output = image_classifier([image] * 5, candidate_labels=["cat", "plane", "remote"], batch_size=2)
self.assertEqual(
nested_simplify(output),
[
[
{"score": 0.511, "label": "remote"},
{"score": 0.485, "label": "cat"},
{"score": 0.004, "label": "plane"},
],
]
* 5,
)
@slow
@require_torch
def test_siglip_model_pt(self):
image_classifier = pipeline(
task="zero-shot-image-classification",
model="google/siglip-base-patch16-224",
)
# This is an image of 2 cats with remotes and no planes
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
output = image_classifier(image, candidate_labels=["2 cats", "a plane", "a remote"])
self.assertEqual(
nested_simplify(output),
[
{"score": 0.198, "label": "2 cats"},
{"score": 0.0, "label": "a remote"},
{"score": 0.0, "label": "a plane"},
],
)
output = image_classifier([image] * 5, candidate_labels=["2 cats", "a plane", "a remote"], batch_size=2)
self.assertEqual(
nested_simplify(output),
[
[
{"score": 0.198, "label": "2 cats"},
{"score": 0.0, "label": "a remote"},
{"score": 0.0, "label": "a plane"},
]
]
* 5,
)
@slow
@require_torch
def test_blip2_model_pt(self):
image_classifier = pipeline(
task="zero-shot-image-classification",
model="Salesforce/blip2-itm-vit-g",
)
# This is an image of 2 cats with remotes and no planes
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
output = image_classifier(
image,
candidate_labels=["2 cats", "a plane", "a remote"],
tokenizer_kwargs={"return_token_type_ids": False},
)
self.assertEqual(
nested_simplify(output),
[
{"score": 0.369, "label": "2 cats"},
{"score": 0.333, "label": "a remote"},
{"score": 0.297, "label": "a plane"},
],
)
output = image_classifier(
[image] * 5,
candidate_labels=["2 cats", "a plane", "a remote"],
batch_size=2,
tokenizer_kwargs={"return_token_type_ids": False},
)
self.assertEqual(
nested_simplify(output),
[
[
{"score": 0.369, "label": "2 cats"},
{"score": 0.333, "label": "a remote"},
{"score": 0.297, "label": "a plane"},
]
]
* 5,
)

View File

@@ -0,0 +1,235 @@
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers import (
MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING,
ZeroShotObjectDetectionPipeline,
is_vision_available,
pipeline,
)
from transformers.testing_utils import (
is_pipeline_test,
nested_simplify,
require_torch,
require_vision,
slow,
)
from .test_pipelines_common import ANY
if is_vision_available():
from PIL import Image
else:
class Image:
@staticmethod
def open(*args, **kwargs):
pass
@is_pipeline_test
@require_vision
@require_torch
class ZeroShotObjectDetectionPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING
def get_test_pipeline(
self,
model,
tokenizer=None,
image_processor=None,
feature_extractor=None,
processor=None,
dtype="float32",
):
object_detector = ZeroShotObjectDetectionPipeline(
model=model,
processor=processor,
tokenizer=tokenizer,
image_processor=image_processor,
dtype=dtype,
)
examples = [
{
"image": "./tests/fixtures/tests_samples/COCO/000000039769.png",
"candidate_labels": ["cat", "remote", "couch"],
}
]
return object_detector, examples
def run_pipeline_test(self, object_detector, examples):
outputs = object_detector(examples[0].get("image"), examples[0].get("candidate_labels"), threshold=0.0)
n = len(outputs)
self.assertGreater(n, 0)
self.assertEqual(
outputs,
[
{
"score": ANY(float),
"label": ANY(str),
"box": {"xmin": ANY(int), "ymin": ANY(int), "xmax": ANY(int), "ymax": ANY(int)},
}
for i in range(n)
],
)
@require_torch
def test_small_model_pt(self):
object_detector = pipeline(
"zero-shot-object-detection", model="hf-internal-testing/tiny-random-owlvit-object-detection"
)
outputs = object_detector(
"./tests/fixtures/tests_samples/COCO/000000039769.png",
candidate_labels=["cat", "remote", "couch"],
threshold=0.64,
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.7235, "label": "cat", "box": {"xmin": 204, "ymin": 167, "xmax": 232, "ymax": 190}},
{"score": 0.7218, "label": "remote", "box": {"xmin": 204, "ymin": 167, "xmax": 232, "ymax": 190}},
{"score": 0.7184, "label": "couch", "box": {"xmin": 204, "ymin": 167, "xmax": 232, "ymax": 190}},
{"score": 0.6748, "label": "remote", "box": {"xmin": 571, "ymin": 83, "xmax": 598, "ymax": 103}},
{"score": 0.6656, "label": "cat", "box": {"xmin": 571, "ymin": 83, "xmax": 598, "ymax": 103}},
{"score": 0.6614, "label": "couch", "box": {"xmin": 571, "ymin": 83, "xmax": 598, "ymax": 103}},
{"score": 0.6456, "label": "remote", "box": {"xmin": 494, "ymin": 105, "xmax": 521, "ymax": 127}},
{"score": 0.642, "label": "remote", "box": {"xmin": 67, "ymin": 274, "xmax": 93, "ymax": 297}},
{"score": 0.6419, "label": "cat", "box": {"xmin": 494, "ymin": 105, "xmax": 521, "ymax": 127}},
],
)
outputs = object_detector(
[
{
"image": "./tests/fixtures/tests_samples/COCO/000000039769.png",
"candidate_labels": ["cat", "remote", "couch"],
}
],
threshold=0.64,
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{"score": 0.7235, "label": "cat", "box": {"xmin": 204, "ymin": 167, "xmax": 232, "ymax": 190}},
{"score": 0.7218, "label": "remote", "box": {"xmin": 204, "ymin": 167, "xmax": 232, "ymax": 190}},
{"score": 0.7184, "label": "couch", "box": {"xmin": 204, "ymin": 167, "xmax": 232, "ymax": 190}},
{"score": 0.6748, "label": "remote", "box": {"xmin": 571, "ymin": 83, "xmax": 598, "ymax": 103}},
{"score": 0.6656, "label": "cat", "box": {"xmin": 571, "ymin": 83, "xmax": 598, "ymax": 103}},
{"score": 0.6614, "label": "couch", "box": {"xmin": 571, "ymin": 83, "xmax": 598, "ymax": 103}},
{"score": 0.6456, "label": "remote", "box": {"xmin": 494, "ymin": 105, "xmax": 521, "ymax": 127}},
{"score": 0.642, "label": "remote", "box": {"xmin": 67, "ymin": 274, "xmax": 93, "ymax": 297}},
{"score": 0.6419, "label": "cat", "box": {"xmin": 494, "ymin": 105, "xmax": 521, "ymax": 127}},
]
],
)
@require_torch
@slow
def test_large_model_pt(self):
object_detector = pipeline("zero-shot-object-detection")
outputs = object_detector(
"http://images.cocodataset.org/val2017/000000039769.jpg",
candidate_labels=["cat", "remote", "couch"],
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.2868, "label": "cat", "box": {"xmin": 324, "ymin": 20, "xmax": 640, "ymax": 373}},
{"score": 0.277, "label": "remote", "box": {"xmin": 40, "ymin": 72, "xmax": 177, "ymax": 115}},
{"score": 0.2537, "label": "cat", "box": {"xmin": 1, "ymin": 55, "xmax": 315, "ymax": 472}},
{"score": 0.1474, "label": "remote", "box": {"xmin": 335, "ymin": 74, "xmax": 371, "ymax": 187}},
{"score": 0.1208, "label": "couch", "box": {"xmin": 4, "ymin": 0, "xmax": 642, "ymax": 476}},
],
)
outputs = object_detector(
[
{
"image": "http://images.cocodataset.org/val2017/000000039769.jpg",
"candidate_labels": ["cat", "remote", "couch"],
},
{
"image": "http://images.cocodataset.org/val2017/000000039769.jpg",
"candidate_labels": ["cat", "remote", "couch"],
},
],
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
[
{"score": 0.2868, "label": "cat", "box": {"xmin": 324, "ymin": 20, "xmax": 640, "ymax": 373}},
{"score": 0.277, "label": "remote", "box": {"xmin": 40, "ymin": 72, "xmax": 177, "ymax": 115}},
{"score": 0.2537, "label": "cat", "box": {"xmin": 1, "ymin": 55, "xmax": 315, "ymax": 472}},
{"score": 0.1474, "label": "remote", "box": {"xmin": 335, "ymin": 74, "xmax": 371, "ymax": 187}},
{"score": 0.1208, "label": "couch", "box": {"xmin": 4, "ymin": 0, "xmax": 642, "ymax": 476}},
],
[
{"score": 0.2868, "label": "cat", "box": {"xmin": 324, "ymin": 20, "xmax": 640, "ymax": 373}},
{"score": 0.277, "label": "remote", "box": {"xmin": 40, "ymin": 72, "xmax": 177, "ymax": 115}},
{"score": 0.2537, "label": "cat", "box": {"xmin": 1, "ymin": 55, "xmax": 315, "ymax": 472}},
{"score": 0.1474, "label": "remote", "box": {"xmin": 335, "ymin": 74, "xmax": 371, "ymax": 187}},
{"score": 0.1208, "label": "couch", "box": {"xmin": 4, "ymin": 0, "xmax": 642, "ymax": 476}},
],
],
)
@require_torch
@slow
def test_threshold(self):
threshold = 0.2
object_detector = pipeline("zero-shot-object-detection")
outputs = object_detector(
"http://images.cocodataset.org/val2017/000000039769.jpg",
candidate_labels=["cat", "remote", "couch"],
threshold=threshold,
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.2868, "label": "cat", "box": {"xmin": 324, "ymin": 20, "xmax": 640, "ymax": 373}},
{"score": 0.277, "label": "remote", "box": {"xmin": 40, "ymin": 72, "xmax": 177, "ymax": 115}},
{"score": 0.2537, "label": "cat", "box": {"xmin": 1, "ymin": 55, "xmax": 315, "ymax": 472}},
],
)
@require_torch
@slow
def test_top_k(self):
top_k = 2
object_detector = pipeline("zero-shot-object-detection")
outputs = object_detector(
"http://images.cocodataset.org/val2017/000000039769.jpg",
candidate_labels=["cat", "remote", "couch"],
top_k=top_k,
)
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{"score": 0.2868, "label": "cat", "box": {"xmin": 324, "ymin": 20, "xmax": 640, "ymax": 373}},
{"score": 0.277, "label": "remote", "box": {"xmin": 40, "ymin": 72, "xmax": 177, "ymax": 115}},
],
)