Sync from v0.13

2026-01-19 10:38:50 +08:00
parent b2ef04d792
commit 5aef6c175a
3714 changed files with 854317 additions and 89342 deletions
--- a/tests/models/test_oot_registration.py
+++ b/tests/models/test_oot_registration.py
@@ -1,32 +1,102 @@
-import torch
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

-from vllm import LLM, ModelRegistry, SamplingParams
-from vllm.model_executor.models.opt import OPTForCausalLM
-from vllm.model_executor.sampling_metadata import SamplingMetadata
+import pytest
+
+from vllm import LLM, SamplingParams
+from vllm.assets.image import ImageAsset
+from vllm.multimodal.image import convert_image_mode
+
+from ..utils import create_new_process_for_each_test


-class MyOPTForCausalLM(OPTForCausalLM):
+@create_new_process_for_each_test()
+def test_plugin(
+    monkeypatch: pytest.MonkeyPatch,
+    dummy_opt_path: str,
+):
+    with monkeypatch.context() as m:
+        m.setenv("VLLM_PLUGINS", "")

-    def compute_logits(self, hidden_states: torch.Tensor,
-                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
-        # this dummy model always predicts the first token
-        logits = super().compute_logits(hidden_states, sampling_metadata)
-        logits.zero_()
-        logits[:, 0] += 1.0
-        return logits
+        with pytest.raises(ValueError, match="are not supported for now"):
+            LLM(model=dummy_opt_path, load_format="dummy")


-def test_oot_registration():
-    # register our dummy model
-    ModelRegistry.register_model("OPTForCausalLM", MyOPTForCausalLM)
-    prompts = ["Hello, my name is", "The text does not matter"]
-    sampling_params = SamplingParams(temperature=0)
-    llm = LLM(model="facebook/opt-125m")
-    first_token = llm.get_tokenizer().decode(0)
-    outputs = llm.generate(prompts, sampling_params)
+@create_new_process_for_each_test()
+def test_oot_registration_text_generation(
+    monkeypatch: pytest.MonkeyPatch,
+    dummy_opt_path: str,
+):
+    with monkeypatch.context() as m:
+        m.setenv("VLLM_PLUGINS", "register_dummy_model")
+        prompts = ["Hello, my name is", "The text does not matter"]
+        sampling_params = SamplingParams(temperature=0)
+        llm = LLM(model=dummy_opt_path, load_format="dummy")
+        first_token = llm.get_tokenizer().decode(0)
+        outputs = llm.generate(prompts, sampling_params)

-    for output in outputs:
-        generated_text = output.outputs[0].text
-        # make sure only the first token is generated
-        rest = generated_text.replace(first_token, "")
-        assert rest == ""
+        for output in outputs:
+            generated_text = output.outputs[0].text
+            # make sure only the first token is generated
+            rest = generated_text.replace(first_token, "")
+            assert rest == ""
+
+
+@create_new_process_for_each_test()
+def test_oot_registration_embedding(
+    monkeypatch: pytest.MonkeyPatch,
+    dummy_gemma2_embedding_path: str,
+):
+    with monkeypatch.context() as m:
+        m.setenv("VLLM_PLUGINS", "register_dummy_model")
+        prompts = ["Hello, my name is", "The text does not matter"]
+        llm = LLM(
+            model=dummy_gemma2_embedding_path, load_format="dummy", max_model_len=2048
+        )
+        outputs = llm.embed(prompts)
+
+        for output in outputs:
+            assert all(v == 0 for v in output.outputs.embedding)
+
+
+image = convert_image_mode(ImageAsset("cherry_blossom").pil_image, "RGB")
+
+
+@create_new_process_for_each_test()
+def test_oot_registration_multimodal(
+    monkeypatch: pytest.MonkeyPatch,
+    dummy_llava_path: str,
+):
+    with monkeypatch.context() as m:
+        m.setenv("VLLM_PLUGINS", "register_dummy_model")
+        prompts = [
+            {
+                "prompt": "What's in the image?<image>",
+                "multi_modal_data": {"image": image},
+            },
+            {
+                "prompt": "Describe the image<image>",
+                "multi_modal_data": {"image": image},
+            },
+        ]
+
+        sampling_params = SamplingParams(temperature=0)
+        llm = LLM(
+            model=dummy_llava_path,
+            load_format="dummy",
+            max_num_seqs=1,
+            trust_remote_code=True,
+            gpu_memory_utilization=0.98,
+            max_model_len=4096,
+            enforce_eager=True,
+            limit_mm_per_prompt={"image": 1},
+        )
+
+        first_token = llm.get_tokenizer().decode(0)
+        outputs = llm.generate(prompts, sampling_params)
+
+        for output in outputs:
+            generated_text = output.outputs[0].text
+            # make sure only the first token is generated
+            rest = generated_text.replace(first_token, "")
+            assert rest == ""