bring back kimi vl ci (#8537)

This commit is contained in:
Stefan He
2025-07-29 13:14:18 -07:00
committed by GitHub
parent a4c3b121d8
commit c0fd77e839
2 changed files with 43 additions and 45 deletions

View File

@@ -189,32 +189,31 @@ class TestGemmaUnderstandsImage(VLMInputTestBase, unittest.IsolatedAsyncioTestCa
)
# commented out before https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct/discussions/27 get fixed
# class TestKimiVLImageUnderstandsImage(
# VLMInputTestBase, unittest.IsolatedAsyncioTestCase
# ):
# model_path = "moonshotai/Kimi-VL-A3B-Instruct"
# chat_template = "kimi-vl"
class TestKimiVLImageUnderstandsImage(
VLMInputTestBase, unittest.IsolatedAsyncioTestCase
):
model_path = "moonshotai/Kimi-VL-A3B-Instruct"
chat_template = "kimi-vl"
# @classmethod
# def _init_visual(cls):
# model = AutoModel.from_pretrained(cls.model_path, trust_remote_code=True)
# cls.vision_tower = model.vision_tower.eval().to(cls.device)
# cls.mm_projector = model.multi_modal_projector.eval().to(cls.device)
@classmethod
def _init_visual(cls):
model = AutoModel.from_pretrained(cls.model_path, trust_remote_code=True)
cls.vision_tower = model.vision_tower.eval().to(cls.device)
cls.mm_projector = model.multi_modal_projector.eval().to(cls.device)
# cls.visual = lambda tokenizer_output: cls.mm_projector(
# cls.vision_tower(
# pixel_values=tokenizer_output["pixel_values"],
# grid_hws=tokenizer_output["image_grid_hws"],
# )
# )
cls.visual = lambda tokenizer_output: cls.mm_projector(
cls.vision_tower(
pixel_values=tokenizer_output["pixel_values"],
grid_hws=tokenizer_output["image_grid_hws"],
)
)
# def _pixel_values_image_data(self, processor_output):
# return dict(
# modality="IMAGE",
# pixel_values=processor_output["pixel_values"],
# image_grid_hws=processor_output["image_grid_hws"],
# )
def _pixel_values_image_data(self, processor_output):
return dict(
modality="IMAGE",
pixel_values=processor_output["pixel_values"],
image_grid_hws=processor_output["image_grid_hws"],
)
# not for CI: too large