[model] support MiniCPM-V 4.0 (#8747)

Signed-off-by: tc-mb <caitianchi@modelbest.cn>
Co-authored-by: Xinyuan Tong <115166877+JustinTong0323@users.noreply.github.com>
This commit is contained in:
tc-mb
2025-09-03 06:33:03 +08:00
committed by GitHub
parent 11dcabc545
commit 03dbf1aa8e
4 changed files with 246 additions and 6 deletions

View File

@@ -161,7 +161,7 @@ class VisionLLMLogitsBase(unittest.IsolatedAsyncioTestCase):
return self.model_runner.model
class TestMiniCPMVLogits(VisionLLMLogitsBase):
class TestMiniCPMV2_6Logits(VisionLLMLogitsBase):
@classmethod
def setUpClass(cls):
super().setUpClass()
@@ -265,3 +265,60 @@ class TestMiniCPMVLogits(VisionLLMLogitsBase):
)
self.compare_outputs(sglang_output, hf_output)
class TestMiniCPMV4Logits(VisionLLMLogitsBase):
@classmethod
def setUpClass(cls):
super().setUpClass()
cls.model_path = "openbmb/MiniCPM-V-4"
cls.tokenizer = AutoTokenizer.from_pretrained(
cls.model_path, trust_remote_code=True
)
cls.processor = AutoProcessor.from_pretrained(
cls.model_path, trust_remote_code=True
)
cls.chat_template = "minicpmv"
cls.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cls.hf_model = (
AutoModel.from_pretrained(
cls.model_path, torch_dtype=torch.bfloat16, trust_remote_code=True
)
.eval()
.to(cls.device)
)
init_embedding_cache()
async def test_vlm_embedding_output(self):
"""
Compares the embedding output of vlm
"""
inputs = self.get_processor_output()
with torch.no_grad():
# hf
model_inputs = {
"input_ids": inputs.input_ids,
"image_bound": inputs.image_bound,
"pixel_values": inputs.pixel_values,
"tgt_sizes": inputs.tgt_sizes,
}
hf_output = self.hf_model.get_input_embeddings()(inputs.input_ids)
# sglang
model = self.get_model()
sglang_output = self.vlm_func(
model,
input_ids=inputs.input_ids.to(self.device),
pixel_values=inputs.pixel_values,
image_bound=inputs.image_bound.to(self.device),
tgt_sizes=inputs.tgt_sizes.to(self.device),
input_embedding=model.get_input_embeddings(),
multimodal_model=model,
placeholder_tokens={
Modality.IMAGE: self.processor.tokenizer.unk_token_id,
},
)
self.compare_outputs(sglang_output, hf_output)