Sync from v0.13
This commit is contained in:
@@ -1,32 +1,102 @@
|
||||
import torch
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from vllm import LLM, ModelRegistry, SamplingParams
|
||||
from vllm.model_executor.models.opt import OPTForCausalLM
|
||||
from vllm.model_executor.sampling_metadata import SamplingMetadata
|
||||
import pytest
|
||||
|
||||
from vllm import LLM, SamplingParams
|
||||
from vllm.assets.image import ImageAsset
|
||||
from vllm.multimodal.image import convert_image_mode
|
||||
|
||||
from ..utils import create_new_process_for_each_test
|
||||
|
||||
|
||||
class MyOPTForCausalLM(OPTForCausalLM):
|
||||
@create_new_process_for_each_test()
|
||||
def test_plugin(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
dummy_opt_path: str,
|
||||
):
|
||||
with monkeypatch.context() as m:
|
||||
m.setenv("VLLM_PLUGINS", "")
|
||||
|
||||
def compute_logits(self, hidden_states: torch.Tensor,
|
||||
sampling_metadata: SamplingMetadata) -> torch.Tensor:
|
||||
# this dummy model always predicts the first token
|
||||
logits = super().compute_logits(hidden_states, sampling_metadata)
|
||||
logits.zero_()
|
||||
logits[:, 0] += 1.0
|
||||
return logits
|
||||
with pytest.raises(ValueError, match="are not supported for now"):
|
||||
LLM(model=dummy_opt_path, load_format="dummy")
|
||||
|
||||
|
||||
def test_oot_registration():
|
||||
# register our dummy model
|
||||
ModelRegistry.register_model("OPTForCausalLM", MyOPTForCausalLM)
|
||||
prompts = ["Hello, my name is", "The text does not matter"]
|
||||
sampling_params = SamplingParams(temperature=0)
|
||||
llm = LLM(model="facebook/opt-125m")
|
||||
first_token = llm.get_tokenizer().decode(0)
|
||||
outputs = llm.generate(prompts, sampling_params)
|
||||
@create_new_process_for_each_test()
|
||||
def test_oot_registration_text_generation(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
dummy_opt_path: str,
|
||||
):
|
||||
with monkeypatch.context() as m:
|
||||
m.setenv("VLLM_PLUGINS", "register_dummy_model")
|
||||
prompts = ["Hello, my name is", "The text does not matter"]
|
||||
sampling_params = SamplingParams(temperature=0)
|
||||
llm = LLM(model=dummy_opt_path, load_format="dummy")
|
||||
first_token = llm.get_tokenizer().decode(0)
|
||||
outputs = llm.generate(prompts, sampling_params)
|
||||
|
||||
for output in outputs:
|
||||
generated_text = output.outputs[0].text
|
||||
# make sure only the first token is generated
|
||||
rest = generated_text.replace(first_token, "")
|
||||
assert rest == ""
|
||||
for output in outputs:
|
||||
generated_text = output.outputs[0].text
|
||||
# make sure only the first token is generated
|
||||
rest = generated_text.replace(first_token, "")
|
||||
assert rest == ""
|
||||
|
||||
|
||||
@create_new_process_for_each_test()
|
||||
def test_oot_registration_embedding(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
dummy_gemma2_embedding_path: str,
|
||||
):
|
||||
with monkeypatch.context() as m:
|
||||
m.setenv("VLLM_PLUGINS", "register_dummy_model")
|
||||
prompts = ["Hello, my name is", "The text does not matter"]
|
||||
llm = LLM(
|
||||
model=dummy_gemma2_embedding_path, load_format="dummy", max_model_len=2048
|
||||
)
|
||||
outputs = llm.embed(prompts)
|
||||
|
||||
for output in outputs:
|
||||
assert all(v == 0 for v in output.outputs.embedding)
|
||||
|
||||
|
||||
image = convert_image_mode(ImageAsset("cherry_blossom").pil_image, "RGB")
|
||||
|
||||
|
||||
@create_new_process_for_each_test()
|
||||
def test_oot_registration_multimodal(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
dummy_llava_path: str,
|
||||
):
|
||||
with monkeypatch.context() as m:
|
||||
m.setenv("VLLM_PLUGINS", "register_dummy_model")
|
||||
prompts = [
|
||||
{
|
||||
"prompt": "What's in the image?<image>",
|
||||
"multi_modal_data": {"image": image},
|
||||
},
|
||||
{
|
||||
"prompt": "Describe the image<image>",
|
||||
"multi_modal_data": {"image": image},
|
||||
},
|
||||
]
|
||||
|
||||
sampling_params = SamplingParams(temperature=0)
|
||||
llm = LLM(
|
||||
model=dummy_llava_path,
|
||||
load_format="dummy",
|
||||
max_num_seqs=1,
|
||||
trust_remote_code=True,
|
||||
gpu_memory_utilization=0.98,
|
||||
max_model_len=4096,
|
||||
enforce_eager=True,
|
||||
limit_mm_per_prompt={"image": 1},
|
||||
)
|
||||
|
||||
first_token = llm.get_tokenizer().decode(0)
|
||||
outputs = llm.generate(prompts, sampling_params)
|
||||
|
||||
for output in outputs:
|
||||
generated_text = output.outputs[0].text
|
||||
# make sure only the first token is generated
|
||||
rest = generated_text.replace(first_token, "")
|
||||
assert rest == ""
|
||||
|
||||
Reference in New Issue
Block a user