[model] support MiniCPM-V 4.0 (#8747)
Signed-off-by: tc-mb <caitianchi@modelbest.cn> Co-authored-by: Xinyuan Tong <115166877+JustinTong0323@users.noreply.github.com>
This commit is contained in:
@@ -39,7 +39,7 @@ class TestCompressedTensorsLlama3FP8(CustomTestCase):
|
||||
)
|
||||
metrics = run_eval(args)
|
||||
print(f"{metrics=}")
|
||||
self.assertGreater(metrics["accuracy"], 0.45)
|
||||
self.assertGreaterEqual(metrics["accuracy"], 0.45)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -165,6 +165,27 @@ class TestMinicpmvServer(ImageOpenAITestMixin):
|
||||
cls.base_url += "/v1"
|
||||
|
||||
|
||||
class TestMinicpmv4Server(ImageOpenAITestMixin):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "openbmb/MiniCPM-V-4"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--mem-fraction-static",
|
||||
"0.35",
|
||||
"--cuda-graph-max-bs",
|
||||
"4",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
|
||||
class TestInternVL2_5Server(ImageOpenAITestMixin):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
@@ -184,7 +205,7 @@ class TestInternVL2_5Server(ImageOpenAITestMixin):
|
||||
cls.base_url += "/v1"
|
||||
|
||||
|
||||
class TestMinicpmoServer(ImageOpenAITestMixin, AudioOpenAITestMixin):
|
||||
class TestMinicpmo2_6Server(ImageOpenAITestMixin, AudioOpenAITestMixin):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "openbmb/MiniCPM-o-2_6"
|
||||
|
||||
@@ -161,7 +161,7 @@ class VisionLLMLogitsBase(unittest.IsolatedAsyncioTestCase):
|
||||
return self.model_runner.model
|
||||
|
||||
|
||||
class TestMiniCPMVLogits(VisionLLMLogitsBase):
|
||||
class TestMiniCPMV2_6Logits(VisionLLMLogitsBase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
super().setUpClass()
|
||||
@@ -265,3 +265,60 @@ class TestMiniCPMVLogits(VisionLLMLogitsBase):
|
||||
)
|
||||
|
||||
self.compare_outputs(sglang_output, hf_output)
|
||||
|
||||
|
||||
class TestMiniCPMV4Logits(VisionLLMLogitsBase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
super().setUpClass()
|
||||
cls.model_path = "openbmb/MiniCPM-V-4"
|
||||
cls.tokenizer = AutoTokenizer.from_pretrained(
|
||||
cls.model_path, trust_remote_code=True
|
||||
)
|
||||
cls.processor = AutoProcessor.from_pretrained(
|
||||
cls.model_path, trust_remote_code=True
|
||||
)
|
||||
cls.chat_template = "minicpmv"
|
||||
|
||||
cls.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
cls.hf_model = (
|
||||
AutoModel.from_pretrained(
|
||||
cls.model_path, torch_dtype=torch.bfloat16, trust_remote_code=True
|
||||
)
|
||||
.eval()
|
||||
.to(cls.device)
|
||||
)
|
||||
init_embedding_cache()
|
||||
|
||||
async def test_vlm_embedding_output(self):
|
||||
"""
|
||||
Compares the embedding output of vlm
|
||||
"""
|
||||
inputs = self.get_processor_output()
|
||||
|
||||
with torch.no_grad():
|
||||
# hf
|
||||
model_inputs = {
|
||||
"input_ids": inputs.input_ids,
|
||||
"image_bound": inputs.image_bound,
|
||||
"pixel_values": inputs.pixel_values,
|
||||
"tgt_sizes": inputs.tgt_sizes,
|
||||
}
|
||||
hf_output = self.hf_model.get_input_embeddings()(inputs.input_ids)
|
||||
|
||||
# sglang
|
||||
model = self.get_model()
|
||||
sglang_output = self.vlm_func(
|
||||
model,
|
||||
input_ids=inputs.input_ids.to(self.device),
|
||||
pixel_values=inputs.pixel_values,
|
||||
image_bound=inputs.image_bound.to(self.device),
|
||||
tgt_sizes=inputs.tgt_sizes.to(self.device),
|
||||
input_embedding=model.get_input_embeddings(),
|
||||
multimodal_model=model,
|
||||
placeholder_tokens={
|
||||
Modality.IMAGE: self.processor.tokenizer.unk_token_id,
|
||||
},
|
||||
)
|
||||
|
||||
self.compare_outputs(sglang_output, hf_output)
|
||||
|
||||
Reference in New Issue
Block a user