init

2025-10-09 16:47:16 +08:00
parent c8feb4deb5
commit e27e3f16bb
5248 changed files with 1778505 additions and 0 deletions
--- a/transformers/tests/pipelines/test_pipelines_text_generation.py
+++ b/transformers/tests/pipelines/test_pipelines_text_generation.py
@@ -0,0 +1,558 @@
+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from unittest.mock import patch
+
+from transformers import (
+    MODEL_FOR_CAUSAL_LM_MAPPING,
+    TextGenerationPipeline,
+    logging,
+    pipeline,
+)
+from transformers.testing_utils import (
+    CaptureLogger,
+    is_pipeline_test,
+    require_accelerate,
+    require_torch,
+    require_torch_accelerator,
+    torch_device,
+)
+
+from .test_pipelines_common import ANY
+
+
+@is_pipeline_test
+@require_torch
+class TextGenerationPipelineTests(unittest.TestCase):
+    model_mapping = MODEL_FOR_CAUSAL_LM_MAPPING
+
+    @require_torch
+    def test_small_model_pt(self):
+        text_generator = pipeline(
+            task="text-generation",
+            model="hf-internal-testing/tiny-random-LlamaForCausalLM",
+            max_new_tokens=10,
+        )
+        # Using `do_sample=False` to force deterministic output
+        outputs = text_generator("This is a test", do_sample=False)
+        self.assertEqual(outputs, [{"generated_text": "This is a testкт MéxicoWSAnimImportдели pip letscosatur"}])
+
+        outputs = text_generator(["This is a test", "This is a second test"], do_sample=False)
+        self.assertEqual(
+            outputs,
+            [
+                [{"generated_text": "This is a testкт MéxicoWSAnimImportдели pip letscosatur"}],
+                [{"generated_text": "This is a second testкт MéxicoWSAnimImportдели Düsseld bootstrap learn user"}],
+            ],
+        )
+
+        outputs = text_generator("This is a test", do_sample=True, num_return_sequences=2, return_tensors=True)
+        self.assertEqual(
+            outputs,
+            [
+                {"generated_token_ids": ANY(list)},
+                {"generated_token_ids": ANY(list)},
+            ],
+        )
+
+    @require_torch
+    def test_small_chat_model_pt(self):
+        text_generator = pipeline(
+            task="text-generation",
+            model="hf-internal-testing/tiny-gpt2-with-chatml-template",
+        )
+        # Using `do_sample=False` to force deterministic output
+        chat1 = [
+            {"role": "system", "content": "This is a system message."},
+            {"role": "user", "content": "This is a test"},
+        ]
+        chat2 = [
+            {"role": "system", "content": "This is a system message."},
+            {"role": "user", "content": "This is a second test"},
+        ]
+        outputs = text_generator(chat1, do_sample=False, max_new_tokens=10)
+        expected_chat1 = chat1 + [
+            {
+                "role": "assistant",
+                "content": " factors factors factors factors factors factors factors factors factors factors",
+            }
+        ]
+        self.assertEqual(
+            outputs,
+            [
+                {"generated_text": expected_chat1},
+            ],
+        )
+
+        outputs = text_generator([chat1, chat2], do_sample=False, max_new_tokens=10)
+        expected_chat2 = chat2 + [
+            {
+                "role": "assistant",
+                "content": " stairs stairs stairs stairs stairs stairs stairs stairs stairs stairs",
+            }
+        ]
+
+        self.assertEqual(
+            outputs,
+            [
+                [{"generated_text": expected_chat1}],
+                [{"generated_text": expected_chat2}],
+            ],
+        )
+
+    @require_torch
+    def test_small_chat_model_continue_final_message(self):
+        # Here we check that passing a chat that ends in an assistant message is handled correctly
+        # by continuing the final message rather than starting a new one
+        text_generator = pipeline(
+            task="text-generation",
+            model="hf-internal-testing/tiny-gpt2-with-chatml-template",
+        )
+        # Using `do_sample=False` to force deterministic output
+        chat1 = [
+            {"role": "system", "content": "This is a system message."},
+            {"role": "user", "content": "This is a test"},
+            {"role": "assistant", "content": "This is"},
+        ]
+        outputs = text_generator(chat1, do_sample=False, max_new_tokens=10)
+
+        # Assert that we continued the last message and there isn't a sneaky <|im_end|>
+        self.assertEqual(
+            outputs,
+            [
+                {
+                    "generated_text": [
+                        {"role": "system", "content": "This is a system message."},
+                        {"role": "user", "content": "This is a test"},
+                        {
+                            "role": "assistant",
+                            "content": "This is stairs stairs stairs stairs stairs stairs stairs stairs stairs stairs",
+                        },
+                    ]
+                }
+            ],
+        )
+
+    @require_torch
+    def test_small_chat_model_continue_final_message_override(self):
+        # Here we check that passing a chat that ends in an assistant message is handled correctly
+        # by continuing the final message rather than starting a new one
+        text_generator = pipeline(
+            task="text-generation",
+            model="hf-internal-testing/tiny-gpt2-with-chatml-template",
+        )
+        # Using `do_sample=False` to force deterministic output
+        chat1 = [
+            {"role": "system", "content": "This is a system message."},
+            {"role": "user", "content": "This is a test"},
+        ]
+        outputs = text_generator(chat1, do_sample=False, max_new_tokens=10, continue_final_message=True)
+
+        # Assert that we continued the last message and there isn't a sneaky <|im_end|>
+        self.assertEqual(
+            outputs,
+            [
+                {
+                    "generated_text": [
+                        {"role": "system", "content": "This is a system message."},
+                        {
+                            "role": "user",
+                            "content": "This is a test stairs stairs stairs stairs stairs stairs stairs stairs stairs stairs",
+                        },
+                    ]
+                }
+            ],
+        )
+
+    @require_torch
+    def test_small_chat_model_with_dataset_pt(self):
+        from torch.utils.data import Dataset
+
+        from transformers.pipelines.pt_utils import KeyDataset
+
+        class MyDataset(Dataset):
+            data = [
+                [
+                    {"role": "system", "content": "This is a system message."},
+                    {"role": "user", "content": "This is a test"},
+                ],
+            ]
+
+            def __len__(self):
+                return 1
+
+            def __getitem__(self, i):
+                return {"text": self.data[i]}
+
+        text_generator = pipeline(
+            task="text-generation",
+            model="hf-internal-testing/tiny-gpt2-with-chatml-template",
+        )
+
+        dataset = MyDataset()
+        key_dataset = KeyDataset(dataset, "text")
+
+        for outputs in text_generator(key_dataset, do_sample=False, max_new_tokens=10):
+            expected_chat = dataset.data[0] + [
+                {
+                    "role": "assistant",
+                    "content": " factors factors factors factors factors factors factors factors factors factors",
+                }
+            ]
+            self.assertEqual(
+                outputs,
+                [
+                    {"generated_text": expected_chat},
+                ],
+            )
+
+    @require_torch
+    def test_small_chat_model_with_iterator_pt(self):
+        from transformers.pipelines.pt_utils import PipelineIterator
+
+        text_generator = pipeline(
+            task="text-generation",
+            model="hf-internal-testing/tiny-gpt2-with-chatml-template",
+        )
+
+        # Using `do_sample=False` to force deterministic output
+        chat1 = [
+            {"role": "system", "content": "This is a system message."},
+            {"role": "user", "content": "This is a test"},
+        ]
+        chat2 = [
+            {"role": "system", "content": "This is a system message."},
+            {"role": "user", "content": "This is a second test"},
+        ]
+        expected_chat1 = chat1 + [
+            {
+                "role": "assistant",
+                "content": " factors factors factors factors factors factors factors factors factors factors",
+            }
+        ]
+        expected_chat2 = chat2 + [
+            {
+                "role": "assistant",
+                "content": " stairs stairs stairs stairs stairs stairs stairs stairs stairs stairs",
+            }
+        ]
+
+        def data():
+            yield from [chat1, chat2]
+
+        outputs = text_generator(data(), do_sample=False, max_new_tokens=10)
+        assert isinstance(outputs, PipelineIterator)
+        outputs = list(outputs)
+        self.assertEqual(
+            outputs,
+            [
+                [{"generated_text": expected_chat1}],
+                [{"generated_text": expected_chat2}],
+            ],
+        )
+
+    def get_test_pipeline(
+        self,
+        model,
+        tokenizer=None,
+        image_processor=None,
+        feature_extractor=None,
+        processor=None,
+        dtype="float32",
+    ):
+        text_generator = TextGenerationPipeline(
+            model=model,
+            tokenizer=tokenizer,
+            feature_extractor=feature_extractor,
+            image_processor=image_processor,
+            processor=processor,
+            dtype=dtype,
+            max_new_tokens=5,
+        )
+        return text_generator, ["This is a test", "Another test"]
+
+    def test_stop_sequence_stopping_criteria(self):
+        prompt = """Hello I believe in"""
+        text_generator = pipeline(
+            "text-generation", model="hf-internal-testing/tiny-random-gpt2", max_new_tokens=5, do_sample=False
+        )
+        output = text_generator(prompt)
+        self.assertEqual(
+            output,
+            [{"generated_text": "Hello I believe in fe fe fe fe fe"}],
+        )
+
+        output = text_generator(prompt, stop_sequence=" fe")
+        self.assertEqual(output, [{"generated_text": "Hello I believe in fe"}])
+
+    def run_pipeline_test(self, text_generator, _):
+        model = text_generator.model
+        tokenizer = text_generator.tokenizer
+
+        outputs = text_generator("This is a test")
+        self.assertEqual(outputs, [{"generated_text": ANY(str)}])
+        self.assertTrue(outputs[0]["generated_text"].startswith("This is a test"))
+
+        outputs = text_generator("This is a test", return_full_text=False)
+        self.assertEqual(outputs, [{"generated_text": ANY(str)}])
+        self.assertNotIn("This is a test", outputs[0]["generated_text"])
+
+        text_generator = pipeline(
+            task="text-generation", model=model, tokenizer=tokenizer, return_full_text=False, max_new_tokens=5
+        )
+        outputs = text_generator("This is a test")
+        self.assertEqual(outputs, [{"generated_text": ANY(str)}])
+        self.assertNotIn("This is a test", outputs[0]["generated_text"])
+
+        outputs = text_generator("This is a test", return_full_text=True)
+        self.assertEqual(outputs, [{"generated_text": ANY(str)}])
+        self.assertTrue(outputs[0]["generated_text"].startswith("This is a test"))
+
+        outputs = text_generator(["This is great !", "Something else"], num_return_sequences=2, do_sample=True)
+        self.assertEqual(
+            outputs,
+            [
+                [{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
+                [{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
+            ],
+        )
+
+        if text_generator.tokenizer.pad_token is not None:
+            outputs = text_generator(
+                ["This is great !", "Something else"], num_return_sequences=2, batch_size=2, do_sample=True
+            )
+            self.assertEqual(
+                outputs,
+                [
+                    [{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
+                    [{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
+                ],
+            )
+
+        with self.assertRaises(ValueError):
+            outputs = text_generator("test", return_full_text=True, return_text=True)
+        with self.assertRaises(ValueError):
+            outputs = text_generator("test", return_full_text=True, return_tensors=True)
+        with self.assertRaises(ValueError):
+            outputs = text_generator("test", return_text=True, return_tensors=True)
+
+        # Empty prompt is slightly special
+        # it requires BOS token to exist.
+        # Special case for Pegasus which will always append EOS so will
+        # work even without BOS.
+        if (
+            text_generator.tokenizer.bos_token_id is not None
+            or "Pegasus" in tokenizer.__class__.__name__
+            or "Git" in model.__class__.__name__
+        ):
+            outputs = text_generator("")
+            self.assertEqual(outputs, [{"generated_text": ANY(str)}])
+        else:
+            with self.assertRaises((ValueError, AssertionError)):
+                outputs = text_generator("", add_special_tokens=False)
+
+        # We don't care about infinite range models.
+        # They already work.
+        # Skip this test for XGLM, since it uses sinusoidal positional embeddings which are resized on-the-fly.
+        EXTRA_MODELS_CAN_HANDLE_LONG_INPUTS = [
+            "RwkvForCausalLM",
+            "XGLMForCausalLM",
+            "GPTNeoXForCausalLM",
+            "GPTNeoXJapaneseForCausalLM",
+            "FuyuForCausalLM",
+            "LlamaForCausalLM",
+        ]
+        if (
+            tokenizer.model_max_length < 10000
+            and text_generator.model.__class__.__name__ not in EXTRA_MODELS_CAN_HANDLE_LONG_INPUTS
+        ):
+            # Handling of large generations
+            if str(text_generator.device) == "cpu":
+                with self.assertRaises((RuntimeError, IndexError, ValueError, AssertionError)):
+                    text_generator("This is a test" * 500, max_new_tokens=5)
+
+            outputs = text_generator("This is a test" * 500, handle_long_generation="hole", max_new_tokens=5)
+            # Hole strategy cannot work
+            if str(text_generator.device) == "cpu":
+                with self.assertRaises(ValueError):
+                    text_generator(
+                        "This is a test" * 500,
+                        handle_long_generation="hole",
+                        max_new_tokens=tokenizer.model_max_length + 10,
+                    )
+
+    @require_torch
+    @require_accelerate
+    @require_torch_accelerator
+    def test_small_model_pt_bloom_accelerate(self):
+        import torch
+
+        # Classic `model_kwargs`
+        pipe = pipeline(
+            model="hf-internal-testing/tiny-random-bloom",
+            model_kwargs={"device_map": "auto", "dtype": torch.bfloat16},
+            max_new_tokens=5,
+            do_sample=False,
+        )
+        self.assertEqual(pipe.model.lm_head.weight.dtype, torch.bfloat16)
+        out = pipe("This is a test")
+        self.assertEqual(
+            out,
+            [{"generated_text": ("This is a test test test test test test")}],
+        )
+
+        # Upgraded those two to real pipeline arguments (they just get sent for the model as they're unlikely to mean anything else.)
+        pipe = pipeline(
+            model="hf-internal-testing/tiny-random-bloom",
+            device_map="auto",
+            dtype=torch.bfloat16,
+            max_new_tokens=5,
+            do_sample=False,
+        )
+        self.assertEqual(pipe.model.lm_head.weight.dtype, torch.bfloat16)
+        out = pipe("This is a test")
+        self.assertEqual(
+            out,
+            [{"generated_text": ("This is a test test test test test test")}],
+        )
+
+        # dtype will be automatically set to torch.bfloat16 if not provided - check: https://github.com/huggingface/transformers/pull/38882
+        pipe = pipeline(
+            model="hf-internal-testing/tiny-random-bloom", device_map="auto", max_new_tokens=5, do_sample=False
+        )
+        self.assertEqual(pipe.model.lm_head.weight.dtype, torch.bfloat16)
+        out = pipe("This is a test")
+        self.assertEqual(
+            out,
+            [{"generated_text": ("This is a test test test test test test")}],
+        )
+
+    @require_torch
+    @require_torch_accelerator
+    def test_small_model_fp16(self):
+        import torch
+
+        pipe = pipeline(
+            model="hf-internal-testing/tiny-random-bloom",
+            device=torch_device,
+            dtype=torch.float16,
+            max_new_tokens=3,
+        )
+        pipe("This is a test")
+
+    @require_torch
+    @require_accelerate
+    @require_torch_accelerator
+    def test_pipeline_accelerate_top_p(self):
+        import torch
+
+        pipe = pipeline(
+            model="hf-internal-testing/tiny-random-bloom",
+            device_map=torch_device,
+            dtype=torch.float16,
+            max_new_tokens=3,
+        )
+        pipe("This is a test", do_sample=True, top_p=0.5)
+
+    def test_pipeline_length_setting_warning(self):
+        prompt = """Hello world"""
+        text_generator = pipeline("text-generation", model="hf-internal-testing/tiny-random-gpt2", max_new_tokens=5)
+        logger = logging.get_logger("transformers.generation.utils")
+        logger_msg = "Both `max_new_tokens`"  # The beginning of the message to be checked in this test
+
+        # Both are set by the user -> log warning
+        with CaptureLogger(logger) as cl:
+            _ = text_generator(prompt, max_length=10, max_new_tokens=1)
+        self.assertIn(logger_msg, cl.out)
+
+        # The user only sets one -> no warning
+        with CaptureLogger(logger) as cl:
+            _ = text_generator(prompt, max_new_tokens=1)
+        self.assertNotIn(logger_msg, cl.out)
+
+        with CaptureLogger(logger) as cl:
+            _ = text_generator(prompt, max_length=10, max_new_tokens=None)
+        self.assertNotIn(logger_msg, cl.out)
+
+    def test_return_dict_in_generate(self):
+        text_generator = pipeline("text-generation", model="hf-internal-testing/tiny-random-gpt2", max_new_tokens=2)
+        out = text_generator(
+            ["This is great !", "Something else"], return_dict_in_generate=True, output_logits=True, output_scores=True
+        )
+        self.assertEqual(
+            out,
+            [
+                [
+                    {
+                        "generated_text": ANY(str),
+                        "logits": ANY(list),
+                        "scores": ANY(list),
+                    },
+                ],
+                [
+                    {
+                        "generated_text": ANY(str),
+                        "logits": ANY(list),
+                        "scores": ANY(list),
+                    },
+                ],
+            ],
+        )
+
+    @require_torch
+    def test_pipeline_assisted_generation(self):
+        """Tests that we can run assisted generation in the pipeline"""
+        model = "hf-internal-testing/tiny-random-MistralForCausalLM"
+        pipe = pipeline("text-generation", model=model, assistant_model=model, max_new_tokens=2)
+
+        # We can run the pipeline
+        prompt = "Hello world"
+        _ = pipe(prompt)
+
+        # It is running assisted generation under the hood (e.g. flags incompatible with assisted gen will crash)
+        with self.assertRaises(ValueError):
+            _ = pipe(prompt, generate_kwargs={"num_beams": 2})
+
+    @require_torch
+    def test_pipeline_skip_special_tokens(self):
+        """Tests that we can use `skip_special_tokens=False` to get the special tokens in the output"""
+        model_id = "google/gemma-3-270m-it"
+        chat = [{"role": "user", "content": "What's your name?"}]
+        generator = pipeline("text-generation", model=model_id)
+
+        # normal pipeline use
+        output = generator(chat, max_new_tokens=20, do_sample=False)
+        self.assertNotIn("<end_of_turn>", str(output[0]["generated_text"]))
+
+        # forcing special tokens to be included in the output
+        output = generator(chat, max_new_tokens=1000, do_sample=False, skip_special_tokens=False)
+        self.assertIn("<end_of_turn>", str(output[0]["generated_text"]))
+
+    @require_torch
+    def test_forward_tokenizer_kwargs(self):
+        chat = [
+            {"role": "system", "content": "This is a system message."},
+            {"role": "user", "content": "This is a test"},
+        ]
+        model = "hf-internal-testing/tiny-gpt2-with-chatml-template"
+        text_generator = pipeline("text-generation", model, max_new_tokens=5)
+        tokenizer = text_generator.tokenizer
+
+        with patch.object(tokenizer, "apply_chat_template", wraps=tokenizer.apply_chat_template) as mock:
+            text_generator(chat, tokenizer_encode_kwargs={"enable_thinking": True})
+            self.assertGreater(mock.call_count, 0)
+            kw_call_args = mock.call_args[1]
+            self.assertIn("enable_thinking", kw_call_args)
+            self.assertEqual(kw_call_args["enable_thinking"], True)