[Refactor] simplify multimodal data processing (#8107)

Signed-off-by: Xinyuan Tong <justinning0323@outlook.com>
This commit is contained in:
Xinyuan Tong
2025-07-20 21:43:09 -07:00
committed by GitHub
parent c9e8613c97
commit 8430bfe3e9
30 changed files with 297 additions and 421 deletions

View File

@@ -104,15 +104,15 @@ class VLMInputTestBase:
)
self.verify_response(output)
async def test_understands_precomputed_features(self):
async def test_understands_precomputed_embeddings(self):
req = self.get_completion_request()
processor_output = self.get_processor_output(req=req)
with torch.inference_mode():
precomputed_features = self.__class__.visual(processor_output)
precomputed_embeddings = self.__class__.visual(processor_output)
output = await self.engine.async_generate(
input_ids=processor_output["input_ids"][0].detach().cpu().tolist(),
image_data=[
self._precomputed_image_data(processor_output, precomputed_features)
self._precomputed_image_data(processor_output, precomputed_embeddings)
],
sampling_params=dict(temperature=0.0),
)
@@ -128,11 +128,11 @@ class VLMInputTestBase:
)
self.verify_response(output)
def _precomputed_image_data(self, processor_output, precomputed_features):
def _precomputed_image_data(self, processor_output, precomputed_embeddings):
"""This should not be overridden."""
return dict(
modality="IMAGE",
precomputed_features=precomputed_features,
precomputed_embeddings=precomputed_embeddings,
)
def _pixel_values_image_data(self, processor_output):