init
This commit is contained in:
0
transformers/tests/models/clvp/__init__.py
Normal file
0
transformers/tests/models/clvp/__init__.py
Normal file
240
transformers/tests/models/clvp/test_feature_extraction_clvp.py
Normal file
240
transformers/tests/models/clvp/test_feature_extraction_clvp.py
Normal file
@@ -0,0 +1,240 @@
|
||||
# Copyright 2023 HuggingFace Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import itertools
|
||||
import os
|
||||
import random
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
from datasets import Audio, load_dataset
|
||||
|
||||
from transformers import ClvpFeatureExtractor
|
||||
from transformers.testing_utils import (
|
||||
check_json_file_has_correct_format,
|
||||
cleanup,
|
||||
require_torch,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils.import_utils import is_torch_available
|
||||
|
||||
from ...test_sequence_feature_extraction_common import SequenceFeatureExtractionTestMixin
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
global_rng = random.Random()
|
||||
|
||||
|
||||
# Copied from transformers.tests.models.whisper.test_feature_extraction_whisper.floats_list
|
||||
def floats_list(shape, scale=1.0, rng=None, name=None):
|
||||
"""Creates a random float32 tensor"""
|
||||
if rng is None:
|
||||
rng = global_rng
|
||||
|
||||
values = []
|
||||
for batch_idx in range(shape[0]):
|
||||
values.append([])
|
||||
for _ in range(shape[1]):
|
||||
values[-1].append(rng.random() * scale)
|
||||
|
||||
return values
|
||||
|
||||
|
||||
@require_torch
|
||||
class ClvpFeatureExtractionTester:
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
batch_size=7,
|
||||
min_seq_length=400,
|
||||
max_seq_length=2000,
|
||||
feature_size=10,
|
||||
hop_length=160,
|
||||
chunk_length=8,
|
||||
padding_value=0.0,
|
||||
sampling_rate=4_000,
|
||||
return_attention_mask=False,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.min_seq_length = min_seq_length
|
||||
self.max_seq_length = max_seq_length
|
||||
self.seq_length_diff = (self.max_seq_length - self.min_seq_length) // (self.batch_size - 1)
|
||||
self.padding_value = padding_value
|
||||
self.sampling_rate = sampling_rate
|
||||
self.return_attention_mask = return_attention_mask
|
||||
self.feature_size = feature_size
|
||||
self.chunk_length = chunk_length
|
||||
self.hop_length = hop_length
|
||||
|
||||
def prepare_feat_extract_dict(self):
|
||||
return {
|
||||
"feature_size": self.feature_size,
|
||||
"hop_length": self.hop_length,
|
||||
"chunk_length": self.chunk_length,
|
||||
"padding_value": self.padding_value,
|
||||
"sampling_rate": self.sampling_rate,
|
||||
"return_attention_mask": self.return_attention_mask,
|
||||
}
|
||||
|
||||
# Copied from transformers.tests.models.whisper.test_feature_extraction_whisper.WhisperFeatureExtractionTester.prepare_inputs_for_common
|
||||
def prepare_inputs_for_common(self, equal_length=False, numpify=False):
|
||||
def _flatten(list_of_lists):
|
||||
return list(itertools.chain(*list_of_lists))
|
||||
|
||||
if equal_length:
|
||||
speech_inputs = [floats_list((self.max_seq_length, self.feature_size)) for _ in range(self.batch_size)]
|
||||
else:
|
||||
# make sure that inputs increase in size
|
||||
speech_inputs = [
|
||||
floats_list((x, self.feature_size))
|
||||
for x in range(self.min_seq_length, self.max_seq_length, self.seq_length_diff)
|
||||
]
|
||||
if numpify:
|
||||
speech_inputs = [np.asarray(x) for x in speech_inputs]
|
||||
return speech_inputs
|
||||
|
||||
|
||||
@require_torch
|
||||
class ClvpFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.TestCase):
|
||||
feature_extraction_class = ClvpFeatureExtractor
|
||||
|
||||
def setUp(self):
|
||||
self.feat_extract_tester = ClvpFeatureExtractionTester(self)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
# clean-up as much as possible GPU memory occupied by PyTorch
|
||||
cleanup(torch_device)
|
||||
|
||||
# Copied from transformers.tests.models.whisper.test_feature_extraction_whisper.WhisperFeatureExtractionTest.test_feat_extract_from_and_save_pretrained
|
||||
def test_feat_extract_from_and_save_pretrained(self):
|
||||
feat_extract_first = self.feature_extraction_class(**self.feat_extract_dict)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
saved_file = feat_extract_first.save_pretrained(tmpdirname)[0]
|
||||
check_json_file_has_correct_format(saved_file)
|
||||
feat_extract_second = self.feature_extraction_class.from_pretrained(tmpdirname)
|
||||
|
||||
dict_first = feat_extract_first.to_dict()
|
||||
dict_second = feat_extract_second.to_dict()
|
||||
mel_1 = feat_extract_first.mel_filters
|
||||
mel_2 = feat_extract_second.mel_filters
|
||||
self.assertTrue(np.allclose(mel_1, mel_2))
|
||||
self.assertEqual(dict_first, dict_second)
|
||||
|
||||
# Copied from transformers.tests.models.whisper.test_feature_extraction_whisper.WhisperFeatureExtractionTest.test_feat_extract_to_json_file
|
||||
def test_feat_extract_to_json_file(self):
|
||||
feat_extract_first = self.feature_extraction_class(**self.feat_extract_dict)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
json_file_path = os.path.join(tmpdirname, "feat_extract.json")
|
||||
feat_extract_first.to_json_file(json_file_path)
|
||||
feat_extract_second = self.feature_extraction_class.from_json_file(json_file_path)
|
||||
|
||||
dict_first = feat_extract_first.to_dict()
|
||||
dict_second = feat_extract_second.to_dict()
|
||||
mel_1 = feat_extract_first.mel_filters
|
||||
mel_2 = feat_extract_second.mel_filters
|
||||
self.assertTrue(np.allclose(mel_1, mel_2))
|
||||
self.assertEqual(dict_first, dict_second)
|
||||
|
||||
def test_call(self):
|
||||
# Tests that all call wrap to encode_plus and batch_encode_plus
|
||||
feature_extractor = self.feature_extraction_class(**self.feat_extract_tester.prepare_feat_extract_dict())
|
||||
# create three inputs of length 800, 1000, and 1200
|
||||
speech_inputs = [floats_list((1, x))[0] for x in range(800, 1400, 200)]
|
||||
np_speech_inputs = [np.asarray(speech_input) for speech_input in speech_inputs]
|
||||
|
||||
# Test feature size
|
||||
input_features = feature_extractor(np_speech_inputs, padding="max_length", return_tensors="np").input_features
|
||||
self.assertTrue(input_features.ndim == 3)
|
||||
self.assertTrue(input_features.shape[-2] == feature_extractor.feature_size)
|
||||
|
||||
# Test not batched input
|
||||
encoded_sequences_1 = feature_extractor(speech_inputs[0], return_tensors="np").input_features
|
||||
encoded_sequences_2 = feature_extractor(np_speech_inputs[0], return_tensors="np").input_features
|
||||
self.assertTrue(np.allclose(encoded_sequences_1, encoded_sequences_2, atol=1e-3))
|
||||
|
||||
# Test batched
|
||||
encoded_sequences_1 = feature_extractor(speech_inputs, return_tensors="np").input_features
|
||||
encoded_sequences_2 = feature_extractor(np_speech_inputs, return_tensors="np").input_features
|
||||
for enc_seq_1, enc_seq_2 in zip(encoded_sequences_1, encoded_sequences_2):
|
||||
self.assertTrue(np.allclose(enc_seq_1, enc_seq_2, atol=1e-3))
|
||||
|
||||
# Test 2-D numpy arrays are batched.
|
||||
speech_inputs = [floats_list((1, x))[0] for x in (800, 800, 800)]
|
||||
np_speech_inputs = np.asarray(speech_inputs)
|
||||
encoded_sequences_1 = feature_extractor(speech_inputs, return_tensors="np").input_features
|
||||
encoded_sequences_2 = feature_extractor(np_speech_inputs, return_tensors="np").input_features
|
||||
for enc_seq_1, enc_seq_2 in zip(encoded_sequences_1, encoded_sequences_2):
|
||||
self.assertTrue(np.allclose(enc_seq_1, enc_seq_2, atol=1e-3))
|
||||
|
||||
# Test truncation required
|
||||
speech_inputs = [floats_list((1, x))[0] for x in range(200, (feature_extractor.n_samples + 500), 200)]
|
||||
np_speech_inputs = [np.asarray(speech_input) for speech_input in speech_inputs]
|
||||
|
||||
speech_inputs_truncated = [x[: feature_extractor.n_samples] for x in speech_inputs]
|
||||
np_speech_inputs_truncated = [np.asarray(speech_input) for speech_input in speech_inputs_truncated]
|
||||
|
||||
encoded_sequences_1 = feature_extractor(np_speech_inputs, return_tensors="np").input_features
|
||||
encoded_sequences_2 = feature_extractor(np_speech_inputs_truncated, return_tensors="np").input_features
|
||||
for enc_seq_1, enc_seq_2 in zip(encoded_sequences_1, encoded_sequences_2):
|
||||
self.assertTrue(np.allclose(enc_seq_1, enc_seq_2, atol=1e-3))
|
||||
|
||||
# Copied from transformers.tests.models.whisper.test_feature_extraction_whisper.WhisperFeatureExtractionTest.test_double_precision_pad
|
||||
def test_double_precision_pad(self):
|
||||
import torch
|
||||
|
||||
feature_extractor = self.feature_extraction_class(**self.feat_extract_tester.prepare_feat_extract_dict())
|
||||
np_speech_inputs = np.random.rand(100, 32).astype(np.float64)
|
||||
py_speech_inputs = np_speech_inputs.tolist()
|
||||
|
||||
for inputs in [py_speech_inputs, np_speech_inputs]:
|
||||
np_processed = feature_extractor.pad([{"input_features": inputs}], return_tensors="np")
|
||||
self.assertTrue(np_processed.input_features.dtype == np.float32)
|
||||
pt_processed = feature_extractor.pad([{"input_features": inputs}], return_tensors="pt")
|
||||
self.assertTrue(pt_processed.input_features.dtype == torch.float32)
|
||||
|
||||
def _load_datasamples(self, num_samples):
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
ds = ds.cast_column("audio", Audio(sampling_rate=22050))
|
||||
# automatic decoding with librispeech
|
||||
speech_samples = ds.sort("id")[:num_samples]["audio"]
|
||||
|
||||
return [x["array"] for x in speech_samples], [x["sampling_rate"] for x in speech_samples]
|
||||
|
||||
@slow
|
||||
def test_integration(self):
|
||||
# fmt: off
|
||||
EXPECTED_INPUT_FEATURES = torch.tensor(
|
||||
[
|
||||
0.9271, 1.1405, 1.4419, 1.2470, 1.2438, 1.1787, 1.0595, 1.0570, 1.1070,
|
||||
1.2205, 1.2376, 1.2997, 1.1131, 1.0843, 1.0459, 1.1858, 1.2323, 1.3582,
|
||||
1.3401, 1.3770, 1.4173, 1.3381, 1.2291, 1.0854, 1.2116, 1.1873, 1.2178,
|
||||
1.2137, 1.3001, 1.4274
|
||||
]
|
||||
)
|
||||
# fmt: on
|
||||
|
||||
input_speech, sr = self._load_datasamples(1)
|
||||
|
||||
feature_extractor = ClvpFeatureExtractor.from_pretrained("susnato/clvp_dev")
|
||||
input_features = feature_extractor(input_speech, sampling_rate=sr[0], return_tensors="pt").input_features
|
||||
self.assertEqual(input_features.shape, (1, 80, 517))
|
||||
torch.testing.assert_close(input_features[0, 0, :30], EXPECTED_INPUT_FEATURES, rtol=1e-4, atol=1e-4)
|
||||
643
transformers/tests/models/clvp/test_modeling_clvp.py
Normal file
643
transformers/tests/models/clvp/test_modeling_clvp.py
Normal file
@@ -0,0 +1,643 @@
|
||||
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Testing suite for the PyTorch Clvp model."""
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import datasets
|
||||
import numpy as np
|
||||
|
||||
from transformers import ClvpConfig, ClvpDecoderConfig, ClvpEncoderConfig
|
||||
from transformers.testing_utils import (
|
||||
cleanup,
|
||||
require_torch,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import is_torch_available
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
from ...test_configuration_common import ConfigTester
|
||||
from ...test_modeling_common import (
|
||||
ModelTesterMixin,
|
||||
_config_zero_init,
|
||||
ids_tensor,
|
||||
random_attention_mask,
|
||||
)
|
||||
from ...test_pipeline_mixin import PipelineTesterMixin
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import ClvpEncoder, ClvpForCausalLM, ClvpModel, ClvpModelForConditionalGeneration
|
||||
|
||||
from transformers import ClvpFeatureExtractor, ClvpTokenizer
|
||||
|
||||
|
||||
class ClvpEncoderTester:
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
batch_size=2,
|
||||
seq_length=7,
|
||||
is_training=False,
|
||||
use_input_mask=True,
|
||||
use_labels=True,
|
||||
vocab_size=50,
|
||||
hidden_size=128,
|
||||
projection_dim=16,
|
||||
num_hidden_layers=2,
|
||||
num_attention_heads=4,
|
||||
intermediate_size=32,
|
||||
dropout=0.1,
|
||||
attention_dropout=0.1,
|
||||
initializer_range=0.02,
|
||||
scope=None,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.seq_length = seq_length
|
||||
self.is_training = is_training
|
||||
self.use_input_mask = use_input_mask
|
||||
self.use_labels = use_labels
|
||||
self.vocab_size = vocab_size
|
||||
self.hidden_size = hidden_size
|
||||
self.projection_dim = projection_dim
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.intermediate_size = intermediate_size
|
||||
self.dropout = dropout
|
||||
self.attention_dropout = attention_dropout
|
||||
self.initializer_range = initializer_range
|
||||
self.scope = scope
|
||||
self.bos_token_id = vocab_size - 1
|
||||
self.eos_token_id = vocab_size - 1
|
||||
|
||||
def get_config(self):
|
||||
encoder_config = ClvpEncoderConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
projection_dim=self.projection_dim,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
num_attention_heads=self.num_attention_heads,
|
||||
intermediate_size=self.intermediate_size,
|
||||
dropout=self.dropout,
|
||||
attention_dropout=self.attention_dropout,
|
||||
initializer_range=self.initializer_range,
|
||||
bos_token_id=self.bos_token_id,
|
||||
eos_token_id=self.eos_token_id,
|
||||
)
|
||||
|
||||
return encoder_config
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
input_mask = None
|
||||
if self.use_input_mask:
|
||||
input_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
|
||||
if input_mask is not None:
|
||||
batch_size, seq_length = input_mask.shape
|
||||
rnd_start_indices = np.random.randint(1, seq_length - 1, size=(batch_size,))
|
||||
for batch_idx, start_index in enumerate(rnd_start_indices):
|
||||
input_mask[batch_idx, :start_index] = 1
|
||||
input_mask[batch_idx, start_index:] = 0
|
||||
|
||||
encoder_config = self.get_config()
|
||||
|
||||
return encoder_config, input_ids, input_mask
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
speech_config, input_ids, input_mask = config_and_inputs
|
||||
inputs_dict = {"input_ids": input_ids.to(torch_device), "attention_mask": input_mask.to(torch_device)}
|
||||
return speech_config, inputs_dict
|
||||
|
||||
def create_and_check_model(self, speech_config, input_ids, input_mask):
|
||||
text_config = ClvpEncoderConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_size,
|
||||
projection_dim=self.projection_dim,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
num_attention_heads=self.num_attention_heads,
|
||||
intermediate_size=self.intermediate_size,
|
||||
dropout=self.dropout,
|
||||
attention_dropout=self.attention_dropout,
|
||||
initializer_range=self.initializer_range,
|
||||
)
|
||||
text_encoder_model = ClvpEncoder(config=text_config)
|
||||
text_encoder_model.to(torch_device)
|
||||
text_encoder_model.eval()
|
||||
with torch.no_grad():
|
||||
result = text_encoder_model(input_ids, attention_mask=input_mask)
|
||||
result = text_encoder_model(input_ids)
|
||||
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
||||
self.parent.assertEqual(result[0].shape, (self.batch_size, self.projection_dim))
|
||||
|
||||
# now check with speech config
|
||||
speech_encoder_model = ClvpEncoder(config=speech_config)
|
||||
speech_encoder_model.to(torch_device)
|
||||
speech_encoder_model.eval()
|
||||
with torch.no_grad():
|
||||
result = speech_encoder_model(input_ids, attention_mask=input_mask)
|
||||
result = speech_encoder_model(input_ids)
|
||||
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
||||
self.parent.assertEqual(result[0].shape, (self.batch_size, self.projection_dim))
|
||||
|
||||
|
||||
@require_torch
|
||||
class ClvpEncoderTest(ModelTesterMixin, unittest.TestCase):
|
||||
all_model_classes = (ClvpEncoder,) if is_torch_available() else ()
|
||||
test_pruning = False
|
||||
test_head_masking = False
|
||||
test_torchscript = False
|
||||
|
||||
def setUp(self):
|
||||
self.model_tester = ClvpEncoderTester(self)
|
||||
self.encoder_config_tester = ConfigTester(self, config_class=ClvpEncoderConfig, hidden_size=32)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
# clean-up as much as possible GPU memory occupied by PyTorch
|
||||
cleanup(torch_device)
|
||||
|
||||
def test_config(self):
|
||||
self.encoder_config_tester.run_common_tests()
|
||||
|
||||
def test_model(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_model(*config_and_inputs)
|
||||
|
||||
@unittest.skip(reason="ClvpEncoder does not output loss")
|
||||
def test_training(self):
|
||||
pass
|
||||
|
||||
@unittest.skip(reason="ClvpEncoder does not output loss")
|
||||
def test_training_gradient_checkpointing(self):
|
||||
pass
|
||||
|
||||
|
||||
class ClvpDecoderTester:
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
batch_size=2,
|
||||
seq_length=3,
|
||||
is_training=False,
|
||||
vocab_size=300,
|
||||
max_position_embeddings=256,
|
||||
max_text_tokens=256,
|
||||
use_input_mask=True,
|
||||
hidden_size=128,
|
||||
num_hidden_layers=2,
|
||||
num_attention_heads=2,
|
||||
bos_token_id=97,
|
||||
eos_token_id=98,
|
||||
relative_attention_num_buckets=4,
|
||||
relative_attention_max_distance=16,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.seq_length = seq_length
|
||||
self.is_training = is_training
|
||||
self.vocab_size = vocab_size
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.max_text_tokens = max_text_tokens
|
||||
self.use_input_mask = use_input_mask
|
||||
self.hidden_size = hidden_size
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.bos_token_id = bos_token_id
|
||||
self.eos_token_id = eos_token_id
|
||||
self.relative_attention_num_buckets = relative_attention_num_buckets
|
||||
self.relative_attention_max_distance = relative_attention_max_distance
|
||||
|
||||
def get_config(self):
|
||||
decoder_config = ClvpDecoderConfig(
|
||||
vocab_size=self.vocab_size,
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
max_text_tokens=self.max_text_tokens,
|
||||
hidden_size=self.hidden_size,
|
||||
num_hidden_layers=self.num_hidden_layers,
|
||||
num_attention_heads=self.num_attention_heads,
|
||||
bos_token_id=self.bos_token_id,
|
||||
eos_token_id=self.eos_token_id,
|
||||
relative_attention_num_buckets=self.relative_attention_num_buckets,
|
||||
relative_attention_max_distance=self.relative_attention_max_distance,
|
||||
)
|
||||
|
||||
return decoder_config
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
input_mask = None
|
||||
if self.use_input_mask:
|
||||
input_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
|
||||
if input_mask is not None:
|
||||
batch_size, seq_length = input_mask.shape
|
||||
rnd_start_indices = np.random.randint(1, seq_length - 1, size=(batch_size,))
|
||||
for batch_idx, start_index in enumerate(rnd_start_indices):
|
||||
input_mask[batch_idx, :start_index] = 1
|
||||
input_mask[batch_idx, start_index:] = 0
|
||||
|
||||
decoder_config = self.get_config()
|
||||
|
||||
return decoder_config, input_ids, input_mask
|
||||
|
||||
def create_and_check_model(self, config, input_ids, attention_mask):
|
||||
model = ClvpForCausalLM(config).to(torch_device).eval()
|
||||
with torch.no_grad():
|
||||
result = model(input_ids=input_ids, attention_mask=attention_mask)
|
||||
|
||||
self.parent.assertEqual(result[0].shape, (self.batch_size, self.seq_length, self.vocab_size))
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
config, input_ids, attention_mask = config_and_inputs
|
||||
inputs_dict = {
|
||||
"input_ids": input_ids.to(torch_device),
|
||||
"attention_mask": attention_mask.to(torch_device),
|
||||
}
|
||||
return config, inputs_dict
|
||||
|
||||
|
||||
@require_torch
|
||||
class ClvpDecoderTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
||||
all_model_classes = (ClvpModel, ClvpForCausalLM) if is_torch_available() else ()
|
||||
pipeline_model_mapping = {"feature-extraction": ClvpModelForConditionalGeneration} if is_torch_available() else {}
|
||||
|
||||
test_pruning = False
|
||||
|
||||
def setUp(self):
|
||||
self.model_tester = ClvpDecoderTester(self)
|
||||
self.decoder_config_tester = ConfigTester(self, config_class=ClvpDecoderConfig, hidden_size=32)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
# clean-up as much as possible GPU memory occupied by PyTorch
|
||||
cleanup(torch_device)
|
||||
|
||||
def test_model(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_model(*config_and_inputs)
|
||||
|
||||
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
|
||||
if return_labels and model_class == ClvpForCausalLM:
|
||||
inputs_dict["labels"] = torch.zeros(
|
||||
[self.model_tester.batch_size, self.model_tester.seq_length], device=torch_device
|
||||
).long()
|
||||
|
||||
return inputs_dict
|
||||
|
||||
def test_training(self):
|
||||
# we will only test the ClvpForCausalLM since it outputs loss
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
config.return_dict = True
|
||||
|
||||
model = ClvpForCausalLM(config)
|
||||
model.to(torch_device)
|
||||
model.train()
|
||||
inputs = self._prepare_for_class(inputs_dict, ClvpForCausalLM, return_labels=True)
|
||||
loss = model(**inputs).loss
|
||||
loss.backward()
|
||||
|
||||
def test_training_gradient_checkpointing(self):
|
||||
# we will only test the ClvpForCausalLM since it outputs loss
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
config.use_cache = False
|
||||
config.return_dict = True
|
||||
|
||||
model = ClvpForCausalLM(config)
|
||||
model.to(torch_device)
|
||||
model.gradient_checkpointing_enable()
|
||||
model.train()
|
||||
inputs = self._prepare_for_class(inputs_dict, ClvpForCausalLM, return_labels=True)
|
||||
|
||||
loss = model(**inputs).loss
|
||||
loss.backward()
|
||||
|
||||
@unittest.skip(reason="Clvp `prepare_inputs_for_generation` function doesn't have cache position.")
|
||||
def test_generate_continue_from_inputs_embeds(self):
|
||||
pass
|
||||
|
||||
|
||||
class ClvpModelForConditionalGenerationTester:
|
||||
def __init__(self, parent, is_training=False):
|
||||
self.parent = parent
|
||||
self.clvp_encoder_tester = ClvpEncoderTester(parent)
|
||||
self.is_training = is_training
|
||||
self.batch_size = self.clvp_encoder_tester.batch_size # need bs for batching_equivalence test
|
||||
|
||||
def get_config(self):
|
||||
decoder_config = ClvpDecoderConfig(
|
||||
vocab_size=50,
|
||||
max_position_embeddings=30,
|
||||
max_text_tokens=30,
|
||||
hidden_size=128,
|
||||
num_hidden_layers=1,
|
||||
num_attention_heads=2,
|
||||
bos_token_id=97,
|
||||
eos_token_id=98,
|
||||
relative_attention_num_buckets=4,
|
||||
relative_attention_max_distance=16,
|
||||
)
|
||||
text_config = self.clvp_encoder_tester.get_config()
|
||||
speech_config = self.clvp_encoder_tester.get_config()
|
||||
speech_config.vocab_size = 300
|
||||
|
||||
return ClvpConfig.from_sub_model_configs(
|
||||
text_config,
|
||||
speech_config,
|
||||
decoder_config,
|
||||
projection_dim=16,
|
||||
)
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
_, input_ids, attention_mask = self.clvp_encoder_tester.prepare_config_and_inputs()
|
||||
|
||||
ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050))
|
||||
audio = ds.sort("id")[0]["audio"]
|
||||
audio_sample = audio["array"]
|
||||
sr = audio["sampling_rate"]
|
||||
|
||||
feature_extractor = ClvpFeatureExtractor()
|
||||
input_features = feature_extractor(raw_speech=audio_sample, sampling_rate=sr, return_tensors="pt")[
|
||||
"input_features"
|
||||
].to(torch_device)
|
||||
|
||||
config = self.get_config()
|
||||
|
||||
return config, input_ids, attention_mask, input_features
|
||||
|
||||
def create_and_check_model(self, config, input_ids, attention_mask, input_features):
|
||||
model = ClvpModelForConditionalGeneration(config).to(torch_device).eval()
|
||||
with torch.no_grad():
|
||||
result = model(input_ids=input_ids, input_features=input_features, attention_mask=attention_mask)
|
||||
|
||||
self.parent.assertEqual(result.logits_per_speech.shape, (2, self.clvp_encoder_tester.batch_size))
|
||||
self.parent.assertEqual(result.logits_per_text.shape, (self.clvp_encoder_tester.batch_size, 2))
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
config, input_ids, attention_mask, input_features = config_and_inputs
|
||||
inputs_dict = {
|
||||
"input_ids": input_ids.to(torch_device),
|
||||
"attention_mask": attention_mask.to(torch_device),
|
||||
"input_features": input_features.to(torch_device),
|
||||
"return_loss": False,
|
||||
}
|
||||
return config, inputs_dict
|
||||
|
||||
|
||||
@require_torch
|
||||
class ClvpModelForConditionalGenerationTest(ModelTesterMixin, unittest.TestCase):
|
||||
all_model_classes = (ClvpModelForConditionalGeneration,) if is_torch_available() else ()
|
||||
# Doesn't run generation tests. There are interface mismatches when using `generate` -- TODO @gante
|
||||
all_generative_model_classes = ()
|
||||
|
||||
test_head_masking = False
|
||||
test_pruning = False
|
||||
test_resize_embeddings = False
|
||||
test_attention_outputs = False
|
||||
test_torchscript = False
|
||||
|
||||
def setUp(self):
|
||||
self.model_tester = ClvpModelForConditionalGenerationTester(self)
|
||||
common_properties = ["projection_dim", "logit_scale_init_value"]
|
||||
self.clvp_config_tester = ConfigTester(
|
||||
self, config_class=ClvpConfig, has_text_modality=False, common_properties=common_properties, hidden_size=32
|
||||
)
|
||||
|
||||
def test_config(self):
|
||||
self.clvp_config_tester.run_common_tests()
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
# clean-up as much as possible GPU memory occupied by PyTorch
|
||||
cleanup(torch_device)
|
||||
|
||||
def test_model(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_model(*config_and_inputs)
|
||||
|
||||
def test_hidden_states_output(self):
|
||||
def check_hidden_states_output(inputs_dict, config, model_class):
|
||||
model = model_class(config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
||||
|
||||
# check for decoder model, text encoder model and speech encoder model hidden states
|
||||
decoder_hidden_states = outputs.decoder_hidden_states
|
||||
text_encoder_hidden_states = outputs.text_encoder_hidden_states
|
||||
speech_encoder_hidden_states = outputs.speech_encoder_hidden_states
|
||||
|
||||
# check length of the hidden states
|
||||
expected_decoder_num_layers = config.decoder_config.num_hidden_layers + 1
|
||||
self.assertEqual(len(decoder_hidden_states), expected_decoder_num_layers)
|
||||
|
||||
expected_speech_encoder_num_layers = config.text_config.num_hidden_layers + 1
|
||||
self.assertEqual(len(text_encoder_hidden_states), expected_speech_encoder_num_layers)
|
||||
|
||||
expected_text_encoder_num_layers = config.speech_config.num_hidden_layers + 1
|
||||
self.assertEqual(len(speech_encoder_hidden_states), expected_text_encoder_num_layers)
|
||||
|
||||
# check shapes of each hidden state
|
||||
|
||||
# for the decoder model we will only test the dimension because the ClvpConditioningEncoder could increase
|
||||
# the sequence lengths.
|
||||
self.assertEqual(decoder_hidden_states[0].shape[-1], config.decoder_config.hidden_size)
|
||||
|
||||
# the testing for text encoder stays standard because we just pass the text tokens here.
|
||||
self.assertListEqual(
|
||||
list(text_encoder_hidden_states[0].shape[-2:]),
|
||||
[self.model_tester.clvp_encoder_tester.seq_length, config.text_config.hidden_size],
|
||||
)
|
||||
|
||||
# for the decoder model we will only test the dimension because the fix_decoder_outputs method could increase
|
||||
# the sequence lengths by adding `decoder_fixing_codes` tokens at the end.
|
||||
self.assertEqual(speech_encoder_hidden_states[0].shape[-1], config.speech_config.hidden_size)
|
||||
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
inputs_dict["output_hidden_states"] = True
|
||||
check_hidden_states_output(inputs_dict, config, model_class)
|
||||
|
||||
# check that output_hidden_states also work using config
|
||||
del inputs_dict["output_hidden_states"]
|
||||
config.output_hidden_states = True
|
||||
|
||||
check_hidden_states_output(inputs_dict, config, model_class)
|
||||
|
||||
@unittest.skip(reason="Retain_grad is tested in individual model tests")
|
||||
def test_retain_grad_hidden_states_attentions(self):
|
||||
pass
|
||||
|
||||
@unittest.skip(reason="ClvpModelForConditionalGeneration does not have get_input_embeddings")
|
||||
def test_inputs_embeds(self):
|
||||
pass
|
||||
|
||||
@unittest.skip(reason="ClvpModelForConditionalGeneration does not have get_input_embeddings")
|
||||
def test_model_get_set_embeddings(self):
|
||||
pass
|
||||
|
||||
# override as the `logit_scale` parameter initialization is different for Clvp
|
||||
def test_initialization(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
configs_no_init = _config_zero_init(config)
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config=configs_no_init)
|
||||
for name, param in model.named_parameters():
|
||||
if param.requires_grad:
|
||||
# check if `logit_scale` is initialized as per the original implementation
|
||||
if name == "logit_scale":
|
||||
expected_value = np.log(1 / 0.07)
|
||||
returned_value = param.data.item()
|
||||
|
||||
self.assertAlmostEqual(
|
||||
returned_value,
|
||||
expected_value,
|
||||
delta=1e-3,
|
||||
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
|
||||
)
|
||||
else:
|
||||
expected_range = [0.0, 1.0]
|
||||
returned_range = ((param.data.mean() * 1e9).round() / 1e9).item()
|
||||
|
||||
self.assertIn(
|
||||
returned_range,
|
||||
expected_range,
|
||||
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
|
||||
)
|
||||
|
||||
def test_load_speech_text_decoder_config(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
# Save ClvpConfig and check if we can load ClvpEncoderConfig from it
|
||||
with tempfile.TemporaryDirectory() as tmp_dir_name:
|
||||
config.save_pretrained(tmp_dir_name)
|
||||
encoder_config = ClvpEncoderConfig.from_pretrained(tmp_dir_name)
|
||||
self.assertDictEqual(config.text_config.to_dict(), encoder_config.to_dict())
|
||||
|
||||
# Save ClvpConfig and check if we can load ClvpDecoderConfig from it
|
||||
with tempfile.TemporaryDirectory() as tmp_dir_name:
|
||||
config.save_pretrained(tmp_dir_name)
|
||||
decoder_config = ClvpDecoderConfig.from_pretrained(tmp_dir_name)
|
||||
self.assertDictEqual(config.decoder_config.to_dict(), decoder_config.to_dict())
|
||||
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
model_name = "susnato/clvp_dev"
|
||||
model = ClvpModelForConditionalGeneration.from_pretrained(model_name)
|
||||
self.assertIsNotNone(model)
|
||||
|
||||
|
||||
# Since Clvp has a lot of different models connected with each other it's better to test each of them individually along
|
||||
# with a test_full_model_integration. If the model breaks in future, it could be of a great help to identify the broken part.
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch
|
||||
class ClvpIntegrationTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.text = "This is an example text."
|
||||
ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050))
|
||||
audio = ds.sort("id")["audio"][0]
|
||||
self.speech_samples, self.sr = audio["array"], audio["sampling_rate"]
|
||||
|
||||
self.model = ClvpModelForConditionalGeneration.from_pretrained("susnato/clvp_dev").to(torch_device)
|
||||
self.model.eval()
|
||||
tokenizer = ClvpTokenizer.from_pretrained("susnato/clvp_dev")
|
||||
feature_extractor = ClvpFeatureExtractor.from_pretrained("susnato/clvp_dev")
|
||||
|
||||
tokenizer_output = tokenizer(self.text, return_tensors="pt")
|
||||
self.text_tokens = tokenizer_output["input_ids"].to(torch_device)
|
||||
self.input_features = feature_extractor(
|
||||
raw_speech=self.speech_samples, sampling_rate=self.sr, return_tensors="pt"
|
||||
)["input_features"].to(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
# clean-up as much as possible GPU memory occupied by PyTorch
|
||||
cleanup(torch_device, gc_collect=True)
|
||||
|
||||
def test_conditional_encoder(self):
|
||||
with torch.no_grad():
|
||||
conditioning_encoder_outputs = self.model.conditioning_encoder(
|
||||
input_features=self.input_features, input_ids=self.text_tokens
|
||||
).to("cpu")
|
||||
|
||||
self.assertEqual(
|
||||
conditioning_encoder_outputs.shape,
|
||||
torch.Size((self.input_features.shape[0], 18, self.model.config.decoder_config.hidden_size)),
|
||||
)
|
||||
|
||||
EXPECTED_OUTPUTS = torch.tensor(
|
||||
[[-0.8582, 0.5228, 1.9944], [-0.0465, -1.1017, -0.0093], [-0.0466, -0.6030, -0.1280]]
|
||||
)
|
||||
|
||||
torch.testing.assert_close(conditioning_encoder_outputs[0, :3, :3], EXPECTED_OUTPUTS, rtol=1e-4, atol=1e-4)
|
||||
|
||||
def test_decoder_model_generate(self):
|
||||
autoregressive_model_output = self.model.speech_decoder_model.generate(input_ids=self.text_tokens).cpu()
|
||||
|
||||
EXPECTED_OUTPUTS = torch.tensor([[147, 2, 54, 2, 43, 2, 169, 122, 29, 64, 2, 136, 37, 33, 9, 8193]])
|
||||
|
||||
torch.testing.assert_close(autoregressive_model_output, EXPECTED_OUTPUTS)
|
||||
|
||||
def test_text_and_speech_encoder_models(self):
|
||||
# check for text embeds
|
||||
text_embeds = self.model.text_encoder_model(input_ids=self.text_tokens, return_dict=True)[0].cpu()
|
||||
|
||||
# fmt: off
|
||||
EXPECTED_TEXT_EMBEDS = torch.tensor([1.4798, -2.0005, 2.3902, -0.5042, 1.6401, -2.4135, -1.4800, 3.0118, -2.4422, 1.3266, 2.2339, 1.4761, -4.8983, -1.3592, 6.0251, 6.7364, 2.2576, 3.7229, -10.0436, 4.6676])
|
||||
# fmt: on
|
||||
|
||||
torch.testing.assert_close(text_embeds[0, :20], EXPECTED_TEXT_EMBEDS, rtol=1e-4, atol=1e-4)
|
||||
|
||||
# check for speech embeds
|
||||
speech_embeds = self.model.speech_encoder_model(input_ids=self.text_tokens, return_dict=True)[0].cpu()
|
||||
|
||||
# fmt: off
|
||||
EXPECTED_SPEECH_EMBEDS = torch.tensor([3.1202, -3.1183, -1.4264, -6.1339, 1.8885, -0.1983, 0.9461, -1.7414, 0.3320, -3.8400, -1.5715, 1.5096, -1.7576, 0.2387, 4.9758, 5.8450, -6.2534, 2.8587, -5.5816, 4.7821])
|
||||
# fmt: on
|
||||
|
||||
torch.testing.assert_close(speech_embeds[0, :20], EXPECTED_SPEECH_EMBEDS, rtol=1e-4, atol=1e-4)
|
||||
|
||||
def test_full_model_integration(self):
|
||||
full_model_output = self.model.generate(
|
||||
input_ids=self.text_tokens,
|
||||
input_features=self.input_features,
|
||||
do_sample=False,
|
||||
num_beams=4,
|
||||
num_return_sequences=4,
|
||||
max_new_tokens=10,
|
||||
)
|
||||
|
||||
EXPECTED_SPEECH_IDS = torch.tensor([[1953, 1080, 612], [1953, 612, 493], [1953, 612, 716]])
|
||||
EXPECTED_SIMILARITY_SCORES = torch.tensor([[14.7660, 14.4569, 13.6472, 13.5683]])
|
||||
|
||||
torch.testing.assert_close(full_model_output.speech_ids.cpu()[-3:, -3:], EXPECTED_SPEECH_IDS)
|
||||
torch.testing.assert_close(full_model_output.logits_per_text.cpu(), EXPECTED_SIMILARITY_SCORES)
|
||||
124
transformers/tests/models/clvp/test_processing_clvp.py
Normal file
124
transformers/tests/models/clvp/test_processing_clvp.py
Normal file
@@ -0,0 +1,124 @@
|
||||
# Copyright 2023 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import gc
|
||||
import shutil
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers import ClvpFeatureExtractor, ClvpProcessor, ClvpTokenizer
|
||||
from transformers.testing_utils import require_torch
|
||||
|
||||
from .test_feature_extraction_clvp import floats_list
|
||||
|
||||
|
||||
@require_torch
|
||||
class ClvpProcessorTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.checkpoint = "susnato/clvp_dev"
|
||||
self.tmpdirname = tempfile.mkdtemp()
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
shutil.rmtree(self.tmpdirname)
|
||||
gc.collect()
|
||||
|
||||
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.get_tokenizer with Whisper->Clvp
|
||||
def get_tokenizer(self, **kwargs):
|
||||
return ClvpTokenizer.from_pretrained(self.checkpoint, **kwargs)
|
||||
|
||||
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.get_feature_extractor with Whisper->Clvp
|
||||
def get_feature_extractor(self, **kwargs):
|
||||
return ClvpFeatureExtractor.from_pretrained(self.checkpoint, **kwargs)
|
||||
|
||||
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_save_load_pretrained_default with Whisper->Clvp
|
||||
def test_save_load_pretrained_default(self):
|
||||
tokenizer = self.get_tokenizer()
|
||||
feature_extractor = self.get_feature_extractor()
|
||||
|
||||
processor = ClvpProcessor(tokenizer=tokenizer, feature_extractor=feature_extractor)
|
||||
|
||||
processor.save_pretrained(self.tmpdirname)
|
||||
processor = ClvpProcessor.from_pretrained(self.tmpdirname)
|
||||
|
||||
self.assertEqual(processor.tokenizer.get_vocab(), tokenizer.get_vocab())
|
||||
self.assertIsInstance(processor.tokenizer, ClvpTokenizer)
|
||||
|
||||
self.assertEqual(processor.feature_extractor.to_json_string(), feature_extractor.to_json_string())
|
||||
self.assertIsInstance(processor.feature_extractor, ClvpFeatureExtractor)
|
||||
|
||||
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->Clvp,processor(raw_speech->processor(raw_speech=raw_speech
|
||||
def test_feature_extractor(self):
|
||||
feature_extractor = self.get_feature_extractor()
|
||||
tokenizer = self.get_tokenizer()
|
||||
|
||||
processor = ClvpProcessor(tokenizer=tokenizer, feature_extractor=feature_extractor)
|
||||
|
||||
raw_speech = floats_list((3, 1000))
|
||||
|
||||
input_feat_extract = feature_extractor(raw_speech, return_tensors="np")
|
||||
input_processor = processor(raw_speech=raw_speech, return_tensors="np")
|
||||
|
||||
for key in input_feat_extract:
|
||||
self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2)
|
||||
|
||||
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer with Whisper->Clvp
|
||||
def test_tokenizer(self):
|
||||
feature_extractor = self.get_feature_extractor()
|
||||
tokenizer = self.get_tokenizer()
|
||||
|
||||
processor = ClvpProcessor(tokenizer=tokenizer, feature_extractor=feature_extractor)
|
||||
|
||||
input_str = "This is a test string"
|
||||
|
||||
encoded_processor = processor(text=input_str)
|
||||
|
||||
encoded_tok = tokenizer(input_str)
|
||||
|
||||
for key in encoded_tok:
|
||||
self.assertListEqual(encoded_tok[key], encoded_processor[key])
|
||||
|
||||
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->Clvp
|
||||
def test_tokenizer_decode(self):
|
||||
feature_extractor = self.get_feature_extractor()
|
||||
tokenizer = self.get_tokenizer()
|
||||
|
||||
processor = ClvpProcessor(tokenizer=tokenizer, feature_extractor=feature_extractor)
|
||||
|
||||
predicted_ids = [[1, 4, 5, 8, 1, 0, 8], [3, 4, 3, 1, 1, 8, 9]]
|
||||
|
||||
decoded_processor = processor.batch_decode(predicted_ids)
|
||||
decoded_tok = tokenizer.batch_decode(predicted_ids)
|
||||
|
||||
self.assertListEqual(decoded_tok, decoded_processor)
|
||||
|
||||
def test_save_load_pretrained_additional_features(self):
|
||||
processor = ClvpProcessor(tokenizer=self.get_tokenizer(), feature_extractor=self.get_feature_extractor())
|
||||
processor.save_pretrained(self.tmpdirname)
|
||||
|
||||
tokenizer_add_kwargs = self.get_tokenizer(pad_token="(PAD)")
|
||||
feature_extractor_add_kwargs = self.get_feature_extractor(sampling_rate=16000)
|
||||
|
||||
processor = ClvpProcessor.from_pretrained(
|
||||
self.tmpdirname,
|
||||
pad_token="(PAD)",
|
||||
sampling_rate=16000,
|
||||
)
|
||||
|
||||
self.assertEqual(processor.tokenizer.get_vocab(), tokenizer_add_kwargs.get_vocab())
|
||||
self.assertIsInstance(processor.tokenizer, ClvpTokenizer)
|
||||
|
||||
self.assertEqual(processor.feature_extractor.to_json_string(), feature_extractor_add_kwargs.to_json_string())
|
||||
self.assertIsInstance(processor.feature_extractor, ClvpFeatureExtractor)
|
||||
314
transformers/tests/models/clvp/test_tokenization_clvp.py
Normal file
314
transformers/tests/models/clvp/test_tokenization_clvp.py
Normal file
@@ -0,0 +1,314 @@
|
||||
# Copyright 2023 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import json
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from transformers import ClvpTokenizer
|
||||
|
||||
from ...test_tokenization_common import TokenizerTesterMixin, slow
|
||||
|
||||
|
||||
class ClvpTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
from_pretrained_id = "susnato/clvp_dev"
|
||||
tokenizer_class = ClvpTokenizer
|
||||
test_rust_tokenizer = False
|
||||
from_pretrained_kwargs = {"add_prefix_space": True}
|
||||
test_seq2seq = False
|
||||
test_sentencepiece_ignore_case = True
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
super().setUpClass()
|
||||
|
||||
# Adapted from Sennrich et al. 2015 and https://github.com/rsennrich/subword-nmt
|
||||
vocab = [
|
||||
"l",
|
||||
"o",
|
||||
"w",
|
||||
"e",
|
||||
"r",
|
||||
"s",
|
||||
"t",
|
||||
"i",
|
||||
"d",
|
||||
"n",
|
||||
"\u0120",
|
||||
"\u0120l",
|
||||
"\u0120n",
|
||||
"\u0120lo",
|
||||
"\u0120low",
|
||||
"er",
|
||||
"\u0120lowest",
|
||||
"\u0120newer",
|
||||
"\u0120wider",
|
||||
"<unk>",
|
||||
"<|endoftext|>",
|
||||
"[SPACE]",
|
||||
]
|
||||
vocab_tokens = dict(zip(vocab, range(len(vocab))))
|
||||
merges = ["#version: 0.2", "\u0120 l", "\u0120l o", "\u0120lo w", "e r", ""]
|
||||
cls.special_tokens_map = {"unk_token": "<unk>"}
|
||||
|
||||
cls.vocab_file = os.path.join(cls.tmpdirname, "vocab.json")
|
||||
cls.merges_file = os.path.join(cls.tmpdirname, "merges.txt")
|
||||
with open(cls.vocab_file, "w", encoding="utf-8") as fp:
|
||||
fp.write(json.dumps(vocab_tokens) + "\n")
|
||||
with open(cls.merges_file, "w", encoding="utf-8") as fp:
|
||||
fp.write("\n".join(merges))
|
||||
|
||||
# Copied from transformers.tests.models.gpt2.test_tokenization_gpt2.GPT2TokenizationTest.get_tokenizer with GPT2->Clvp
|
||||
@classmethod
|
||||
def get_tokenizer(cls, pretrained_name=None, **kwargs):
|
||||
kwargs.update(cls.special_tokens_map)
|
||||
pretrained_name = pretrained_name or cls.tmpdirname
|
||||
return ClvpTokenizer.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
# Copied from transformers.tests.models.gpt2.test_tokenization_gpt2.GPT2TokenizationTest.get_input_output_texts
|
||||
def get_input_output_texts(self, tokenizer):
|
||||
input_text = "lower newer"
|
||||
output_text = "lower[SPACE]newer"
|
||||
return input_text, output_text
|
||||
|
||||
# Copied from transformers.tests.models.layoutxlm.test_tokenization_layoutxlm.LayoutXLMTokenizationTest.test_add_special_tokens
|
||||
def test_add_special_tokens(self):
|
||||
tokenizers: list[ClvpTokenizer] = self.get_tokenizers(do_lower_case=False)
|
||||
for tokenizer in tokenizers:
|
||||
with self.subTest(f"{tokenizer.__class__.__name__}"):
|
||||
special_token = "[SPECIAL_TOKEN]"
|
||||
special_token_box = [1000, 1000, 1000, 1000]
|
||||
|
||||
tokenizer.add_special_tokens({"cls_token": special_token})
|
||||
encoded_special_token = tokenizer.encode(
|
||||
[special_token], boxes=[special_token_box], add_special_tokens=False
|
||||
)
|
||||
self.assertEqual(len(encoded_special_token), 1)
|
||||
|
||||
decoded = tokenizer.decode(encoded_special_token, skip_special_tokens=True)
|
||||
self.assertTrue(special_token not in decoded)
|
||||
|
||||
# Copied from transformers.tests.models.gpt2.test_tokenization_gpt2.GPT2TokenizationTest.test_rust_and_python_full_tokenizers
|
||||
def test_rust_and_python_full_tokenizers(self):
|
||||
if not self.test_rust_tokenizer:
|
||||
self.skipTest(reason="test_rust_tokenizer is set to False")
|
||||
|
||||
tokenizer = self.get_tokenizer()
|
||||
rust_tokenizer = self.get_rust_tokenizer(add_prefix_space=True)
|
||||
|
||||
sequence = "lower newer"
|
||||
|
||||
# Testing tokenization
|
||||
tokens = tokenizer.tokenize(sequence, add_prefix_space=True)
|
||||
rust_tokens = rust_tokenizer.tokenize(sequence)
|
||||
self.assertListEqual(tokens, rust_tokens)
|
||||
|
||||
# Testing conversion to ids without special tokens
|
||||
ids = tokenizer.encode(sequence, add_special_tokens=False, add_prefix_space=True)
|
||||
rust_ids = rust_tokenizer.encode(sequence, add_special_tokens=False)
|
||||
self.assertListEqual(ids, rust_ids)
|
||||
|
||||
# Testing conversion to ids with special tokens
|
||||
rust_tokenizer = self.get_rust_tokenizer(add_prefix_space=True)
|
||||
ids = tokenizer.encode(sequence, add_prefix_space=True)
|
||||
rust_ids = rust_tokenizer.encode(sequence)
|
||||
self.assertListEqual(ids, rust_ids)
|
||||
|
||||
# Testing the unknown token
|
||||
input_tokens = tokens + [rust_tokenizer.unk_token]
|
||||
input_bpe_tokens = [14, 15, 10, 9, 3, 2, 15, 19]
|
||||
self.assertListEqual(rust_tokenizer.convert_tokens_to_ids(input_tokens), input_bpe_tokens)
|
||||
|
||||
# Copied from transformers.tests.models.gpt2.test_tokenization_gpt2.GPT2TokenizationTest.test_padding
|
||||
def test_padding(self, max_length=15):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
tokenizer_r = self.get_rust_tokenizer(pretrained_name, **kwargs)
|
||||
|
||||
# Simple input
|
||||
s = "This is a simple input"
|
||||
s2 = ["This is a simple input 1", "This is a simple input 2"]
|
||||
p = ("This is a simple input", "This is a pair")
|
||||
p2 = [
|
||||
("This is a simple input 1", "This is a simple input 2"),
|
||||
("This is a simple pair 1", "This is a simple pair 2"),
|
||||
]
|
||||
|
||||
# Simple input tests
|
||||
self.assertRaises(ValueError, tokenizer_r.encode, s, max_length=max_length, padding="max_length")
|
||||
|
||||
# Simple input
|
||||
self.assertRaises(ValueError, tokenizer_r.encode_plus, s, max_length=max_length, padding="max_length")
|
||||
|
||||
# Simple input
|
||||
self.assertRaises(
|
||||
ValueError,
|
||||
tokenizer_r.batch_encode_plus,
|
||||
s2,
|
||||
max_length=max_length,
|
||||
padding="max_length",
|
||||
)
|
||||
|
||||
# Pair input
|
||||
self.assertRaises(ValueError, tokenizer_r.encode, p, max_length=max_length, padding="max_length")
|
||||
|
||||
# Pair input
|
||||
self.assertRaises(ValueError, tokenizer_r.encode_plus, p, max_length=max_length, padding="max_length")
|
||||
|
||||
# Pair input
|
||||
self.assertRaises(
|
||||
ValueError,
|
||||
tokenizer_r.batch_encode_plus,
|
||||
p2,
|
||||
max_length=max_length,
|
||||
padding="max_length",
|
||||
)
|
||||
|
||||
# Copied from transformers.tests.models.gpt2.test_tokenization_gpt2.GPT2TokenizationTest.test_padding_if_pad_token_set_slow
|
||||
def test_padding_if_pad_token_set_slow(self):
|
||||
tokenizer = ClvpTokenizer.from_pretrained(self.tmpdirname, pad_token="<pad>")
|
||||
|
||||
# Simple input
|
||||
s = "This is a simple input"
|
||||
s2 = ["This is a simple input looooooooong", "This is a simple input"]
|
||||
p = ("This is a simple input", "This is a pair")
|
||||
p2 = [
|
||||
("This is a simple input loooooong", "This is a simple input"),
|
||||
("This is a simple pair loooooong", "This is a simple pair"),
|
||||
]
|
||||
|
||||
pad_token_id = tokenizer.pad_token_id
|
||||
|
||||
out_s = tokenizer(s, padding="max_length", max_length=30, return_tensors="np")
|
||||
out_s2 = tokenizer(s2, padding=True, truncate=True, return_tensors="np")
|
||||
out_p = tokenizer(*p, padding="max_length", max_length=60, return_tensors="np")
|
||||
out_p2 = tokenizer(p2, padding=True, truncate=True, return_tensors="np")
|
||||
|
||||
# s
|
||||
# test single string max_length padding
|
||||
self.assertEqual(out_s["input_ids"].shape[-1], 30)
|
||||
self.assertTrue(pad_token_id in out_s["input_ids"])
|
||||
self.assertTrue(0 in out_s["attention_mask"])
|
||||
|
||||
# s2
|
||||
# test automatic padding
|
||||
self.assertEqual(out_s2["input_ids"].shape[-1], 33)
|
||||
# long slice doesn't have padding
|
||||
self.assertFalse(pad_token_id in out_s2["input_ids"][0])
|
||||
self.assertFalse(0 in out_s2["attention_mask"][0])
|
||||
# short slice does have padding
|
||||
self.assertTrue(pad_token_id in out_s2["input_ids"][1])
|
||||
self.assertTrue(0 in out_s2["attention_mask"][1])
|
||||
|
||||
# p
|
||||
# test single pair max_length padding
|
||||
self.assertEqual(out_p["input_ids"].shape[-1], 60)
|
||||
self.assertTrue(pad_token_id in out_p["input_ids"])
|
||||
self.assertTrue(0 in out_p["attention_mask"])
|
||||
|
||||
# p2
|
||||
# test automatic padding pair
|
||||
self.assertEqual(out_p2["input_ids"].shape[-1], 52)
|
||||
# long slice pair doesn't have padding
|
||||
self.assertFalse(pad_token_id in out_p2["input_ids"][0])
|
||||
self.assertFalse(0 in out_p2["attention_mask"][0])
|
||||
# short slice pair does have padding
|
||||
self.assertTrue(pad_token_id in out_p2["input_ids"][1])
|
||||
self.assertTrue(0 in out_p2["attention_mask"][1])
|
||||
|
||||
# Copied from transformers.tests.models.gpt2.test_tokenization_gpt2.GPT2TokenizationTest.test_special_tokens_mask_input_pairs_and_bos_token
|
||||
def test_special_tokens_mask_input_pairs_and_bos_token(self):
|
||||
# TODO: change to self.get_tokenizers() when the fast version is implemented
|
||||
tokenizers = [self.get_tokenizer(do_lower_case=False, add_bos_token=True)]
|
||||
for tokenizer in tokenizers:
|
||||
with self.subTest(f"{tokenizer.__class__.__name__}"):
|
||||
sequence_0 = "Encode this."
|
||||
sequence_1 = "This one too please."
|
||||
encoded_sequence = tokenizer.encode(sequence_0, add_special_tokens=False)
|
||||
encoded_sequence += tokenizer.encode(sequence_1, add_special_tokens=False)
|
||||
encoded_sequence_dict = tokenizer.encode_plus(
|
||||
sequence_0,
|
||||
sequence_1,
|
||||
add_special_tokens=True,
|
||||
return_special_tokens_mask=True,
|
||||
)
|
||||
encoded_sequence_w_special = encoded_sequence_dict["input_ids"]
|
||||
special_tokens_mask = encoded_sequence_dict["special_tokens_mask"]
|
||||
self.assertEqual(len(special_tokens_mask), len(encoded_sequence_w_special))
|
||||
|
||||
filtered_sequence = [
|
||||
(x if not special_tokens_mask[i] else None) for i, x in enumerate(encoded_sequence_w_special)
|
||||
]
|
||||
filtered_sequence = [x for x in filtered_sequence if x is not None]
|
||||
self.assertEqual(encoded_sequence, filtered_sequence)
|
||||
|
||||
def test_token_type_ids(self):
|
||||
tokenizer = self.get_tokenizer()
|
||||
seq_0 = "Test this method."
|
||||
|
||||
# We want to have sequence 0 and sequence 1 are tagged
|
||||
# respectively with 0 and 1 token_ids
|
||||
# (regardless of whether the model use token type ids)
|
||||
# We use this assumption in the QA pipeline among other place
|
||||
output = tokenizer(seq_0, return_token_type_ids=True, add_special_tokens=True)
|
||||
self.assertIn(0, output["token_type_ids"])
|
||||
|
||||
def test_full_tokenizer(self):
|
||||
tokenizer = ClvpTokenizer(self.vocab_file, self.merges_file, **self.special_tokens_map)
|
||||
text = "lower newer"
|
||||
bpe_tokens = ["l", "o", "w", "er", "[SPACE]", "n", "e", "w", "er"]
|
||||
tokens = tokenizer.tokenize(text, add_prefix_space=False)
|
||||
self.assertListEqual(tokens, bpe_tokens)
|
||||
|
||||
input_tokens = tokens + [tokenizer.unk_token]
|
||||
input_bpe_tokens = [0, 1, 2, 15, 21, 9, 3, 2, 15, 19]
|
||||
self.assertListEqual(tokenizer.convert_tokens_to_ids(input_tokens), input_bpe_tokens)
|
||||
|
||||
@slow
|
||||
def test_outputs_with_numbers(self):
|
||||
text = "hello and this is an example text and I have $1000. my lucky number is 12345."
|
||||
tokenizer = ClvpTokenizer.from_pretrained("susnato/clvp_dev")
|
||||
|
||||
# fmt: off
|
||||
EXPECTED_OUTPUT = [62, 84, 28, 2, 53, 2,147, 2, 54, 2, 43, 2, 169, 122, 29, 64, 2, 136, 37, 33, 2, 53, 2, 22,
|
||||
2, 148, 2, 110, 2, 40, 206, 53, 2, 134, 84, 59, 32, 9, 2, 125, 2, 25, 34, 197, 38, 2, 27,
|
||||
231, 15, 44, 2, 54, 2, 33, 100, 25, 76, 2, 40, 206, 53, 7, 2, 40, 46, 18, 2, 21, 97, 17,
|
||||
219, 2, 87, 210, 8, 19, 22, 76, 9,
|
||||
]
|
||||
# fmt: on
|
||||
|
||||
self.assertListEqual(tokenizer.encode(text, add_special_tokens=False), EXPECTED_OUTPUT)
|
||||
|
||||
@slow
|
||||
def test_tokenizer_integration(self):
|
||||
sequences = [
|
||||
"Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides "
|
||||
"general-purpose architectures (BERT, RoBERTa, XLM, DistilBert, XLNet...) for Natural "
|
||||
"Language Understanding (NLU) and Natural Language Generation (NLG) with over multiple pretrained "
|
||||
"models and deep interoperability between Jax, PyTorch and TensorFlow.",
|
||||
"BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly "
|
||||
"conditioning on both left and right context in all layers.",
|
||||
"The quick brown fox jumps over the lazy dog.",
|
||||
]
|
||||
|
||||
# fmt: off
|
||||
expected_encoding = {'input_ids': [[144, 43, 32, 87, 26, 173, 2, 5, 87, 26, 44, 70, 2, 209, 27, 2, 55, 2, 29, 38, 51, 31, 71, 8, 144, 43, 32, 87, 26, 173, 2, 53, 2, 29, 38, 51, 31, 71, 8, 29, 46, 144, 137, 49, 8, 15, 44, 33, 6, 2, 187, 35, 83, 61, 2, 20, 50, 44, 56, 8, 29, 121, 139, 66, 2, 59, 71, 60, 18, 16, 33, 34, 175, 2, 5, 15, 44, 33, 7, 2, 89, 15, 44, 33, 14, 7, 2, 37, 25, 26, 7, 2, 17, 54, 78, 25, 15, 44, 33, 7, 2, 37, 25, 111, 33, 9, 9, 9, 6, 2, 87, 2, 27, 48, 121, 56, 2, 25, 43, 20, 34, 14, 112, 2, 97, 234, 63, 53, 52, 2, 5, 27, 25, 34, 6, 2, 53, 2, 27, 48, 121, 56, 2, 25, 43, 20, 34, 14, 112, 2, 20, 50, 44, 158, 2, 5, 27, 25, 20, 6, 2, 103, 2, 253, 2, 26, 167, 78, 29, 64, 2, 29, 46, 144, 137, 49, 2, 115, 126, 25, 32, 2, 53, 2, 126, 18, 29, 2, 41, 114, 161, 44, 109, 151, 240, 2, 67, 33, 100, 50, 2, 23, 14, 37, 7, 2, 29, 38, 51, 31, 71, 2, 53, 2, 33, 50, 32, 57, 19, 25, 69, 9], [ 15, 44, 33, 2, 54, 2, 17, 61, 22, 20, 27, 49, 2, 51, 2, 29, 46, 8, 144, 137, 2, 126, 18, 29, 2, 15, 83, 22, 46, 16, 181, 56, 2, 46, 29, 175, 86, 158, 32, 2, 154, 2, 97, 25, 14, 67, 25, 49, 2, 136, 37, 33, 2, 185, 2, 23, 28, 41, 33, 70, 2, 135, 17, 60, 107, 52, 2, 47, 2, 165, 40, 2, 64, 19, 33, 2, 53, 2, 101, 104, 2, 135, 136, 37, 33, 2, 41, 2, 108, 2, 25, 88, 173, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 42, 2, 194, 91, 24, 2, 243, 190, 2, 182, 37, 2, 23, 231, 29, 32, 2, 253, 2, 42, 2, 25, 14, 39, 38, 2, 134, 20, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], # noqa: E501
|
||||
'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], # noqa: E501
|
||||
}
|
||||
# fmt: on
|
||||
|
||||
self.tokenizer_integration_test_util(
|
||||
sequences=sequences, expected_encoding=expected_encoding, model_name="susnato/clvp_dev", padding=True
|
||||
)
|
||||
Reference in New Issue
Block a user