init

2025-10-09 16:47:16 +08:00
parent c8feb4deb5
commit e27e3f16bb
5248 changed files with 1778505 additions and 0 deletions
--- a/transformers/tests/models/phimoe/init.py
+++ b/transformers/tests/models/phimoe/init.py
--- a/transformers/tests/models/phimoe/test_modeling_phimoe.py
+++ b/transformers/tests/models/phimoe/test_modeling_phimoe.py
@@ -0,0 +1,216 @@
+# Copyright 2024 Microsoft and the HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Testing suite for the PyTorch PhiMoE model."""
+
+import unittest
+
+from parameterized import parameterized
+
+from transformers import PhimoeConfig, StaticCache, is_torch_available
+from transformers.testing_utils import (
+    cleanup,
+    require_torch,
+    slow,
+    torch_device,
+)
+
+from ...causal_lm_tester import CausalLMModelTest, CausalLMModelTester
+
+
+if is_torch_available():
+    import torch
+
+    from transformers import (
+        AutoTokenizer,
+        PhimoeForCausalLM,
+        PhimoeForSequenceClassification,
+        PhimoeModel,
+    )
+
+    end_of_text_token = 32000
+
+    class PhimoeMiniWithStaticCache(torch.nn.Module):
+        def __init__(self, model: PhimoeForCausalLM, batch_size: int, max_seq_len: int):
+            super().__init__()
+            self.model = model
+            self.cache = StaticCache(config=model.config, max_cache_len=max_seq_len)
+
+        def forward(
+            self,
+            input_ids: torch.LongTensor = None,
+        ) -> torch.FloatTensor:
+            return self.model.forward(
+                input_ids=input_ids,
+                use_cache=True,
+                return_dict=True,
+                past_key_values=self.cache,
+            ).logits
+
+        @torch.no_grad()
+        @staticmethod
+        def generate(model: PhimoeForCausalLM, prompt_tokens: torch.LongTensor, max_seq_len: int) -> list[int]:
+            model = PhimoeMiniWithStaticCache(model, 1, max_seq_len + prompt_tokens.shape[-1])
+
+            response_tokens = []
+
+            for input_pos in range(prompt_tokens.shape[-1]):
+                result = model.forward(
+                    input_ids=prompt_tokens[:, input_pos : input_pos + 1],
+                )
+                response_tokens.append(prompt_tokens[0][input_pos].item())
+
+            current_token = torch.argmax(result[:, -1, :], dim=-1).item()
+            response_tokens.append(current_token)
+
+            while current_token != end_of_text_token and len(response_tokens) < max_seq_len:
+                result = model.forward(
+                    input_ids=torch.tensor([[current_token]], dtype=torch.long),
+                )
+                current_token = torch.argmax(result[:, -1, :], dim=-1).item()
+                response_tokens.append(current_token)
+
+            return response_tokens
+
+
+class PhimoeModelTester(CausalLMModelTester):
+    if is_torch_available():
+        config_class = PhimoeConfig
+        base_model_class = PhimoeModel
+        causal_lm_class = PhimoeForCausalLM
+        sequence_class = PhimoeForSequenceClassification
+
+
+@require_torch
+class PhimoeModelTest(CausalLMModelTest, unittest.TestCase):
+    all_model_classes = (
+        (PhimoeModel, PhimoeForCausalLM, PhimoeForSequenceClassification) if is_torch_available() else ()
+    )
+
+    test_headmasking = False
+    test_pruning = False
+    test_all_params_have_gradient = False
+    model_tester_class = PhimoeModelTester
+    pipeline_model_mapping = (
+        {
+            "feature-extraction": PhimoeModel,
+            "text-classification": PhimoeForSequenceClassification,
+            "text-generation": PhimoeForCausalLM,
+            "zero-shot": PhimoeForSequenceClassification,
+        }
+        if is_torch_available()
+        else {}
+    )
+
+    # TODO (ydshieh): Check this. See https://app.circleci.com/pipelines/github/huggingface/transformers/79292/workflows/fa2ba644-8953-44a6-8f67-ccd69ca6a476/jobs/1012905
+    def is_pipeline_test_to_skip(
+        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
+    ):
+        return True
+
+    @unittest.skip("PhiMoE's RoPE has custom parameterization")
+    def test_model_rope_scaling_frequencies(self):
+        pass
+
+    @parameterized.expand([("linear",), ("dynamic",), ("yarn",)])
+    @unittest.skip("PhiMoE's RoPE has custom parameterization")
+    def test_model_rope_scaling_from_config(self, scaling_type):
+        pass
+
+
+@slow
+@require_torch
+class PhimoeIntegrationTest(unittest.TestCase):
+    model = None
+
+    @classmethod
+    def get_model(cls):
+        if cls.model is None:
+            cls.model = PhimoeForCausalLM.from_pretrained(
+                "microsoft/Phi-3.5-MoE-instruct", dtype="auto", device_map="auto"
+            )
+        return cls.model
+
+    @classmethod
+    def tearDownClass(cls):
+        del cls.model
+        cleanup(torch_device, gc_collect=True)
+
+    def setUp(self):
+        cleanup(torch_device, gc_collect=True)
+
+    def tearDown(self):
+        cleanup(torch_device, gc_collect=True)
+
+    def test_model_phimoe_instruct_logits(self):
+        input_ids = {"input_ids": torch.tensor([[1212, 318, 281, 1672]], dtype=torch.long, device=torch_device)}
+
+        model = self.get_model()
+        model.eval()
+
+        with torch.no_grad():
+            output = model(**input_ids).logits
+
+        EXPECTED_OUTPUT = torch.tensor(
+            [
+                    [-3.4844, -2.4531, -1.1719, 0.6055, -0.4922, -0.1001, 0.8086, -0.2422, 0.3477, -1.0078],
+                    [-0.9766, 0.1631, -0.5508, 2.3594, 0.7031, 3.1719, 0.4141, 0.2305, 0.6055, -2.1250],
+            ]
+        ).to(device=torch_device, dtype=output.dtype)  # fmt: skip
+
+        torch.testing.assert_close(output[0, :2, :10], EXPECTED_OUTPUT, rtol=1e-4, atol=1e-4)
+
+    def test_phimoe_instruct_generation(self):
+        model = self.get_model()
+        tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-MoE-instruct")
+
+        messages = [
+            {
+                "role": "system",
+                "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user.",
+            },
+            {"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"},
+        ]
+        inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
+
+        outputs = model.generate(inputs, max_new_tokens=30)
+        output_text = tokenizer.batch_decode(outputs)
+
+        EXPECTED_OUTPUT = [
+            "<|system|> You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user.<|end|><|user|> Can you provide ways to eat combinations of bananas and dragonfruits?<|end|><|assistant|> Certainly! Bananas and dragonfruits are both delicious and nutritious fruits that can be combined in various ways to create",
+        ]
+        self.assertListEqual(output_text, EXPECTED_OUTPUT)
+
+    def test_phimoe_instruct_with_static_cache(self):
+        model = self.get_model()
+        tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-MoE-instruct")
+
+        messages = [
+            {
+                "role": "system",
+                "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user.",
+            },
+            {"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"},
+        ]
+        inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(
+            torch_device
+        )
+
+        response_tokens = PhimoeMiniWithStaticCache.generate(model, inputs, max_seq_len=30)
+        output_text = tokenizer.batch_decode(torch.tensor([response_tokens], dtype=torch.long, device=torch_device))
+
+        EXPECTED_OUTPUT = [
+            "<|system|> You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user.<|end|><|user|> Can you provide ways to eat combinations of bananas and dragonfruits?<|end|><|assistant|> C"
+        ]
+        self.assertListEqual(output_text, EXPECTED_OUTPUT)