diff --git a/vllm_kunlun/models/qwen2_vl.py b/vllm_kunlun/models/qwen2_vl.py index 18f854c..2f16db1 100644 --- a/vllm_kunlun/models/qwen2_vl.py +++ b/vllm_kunlun/models/qwen2_vl.py @@ -1068,7 +1068,8 @@ class Qwen2VLMultiModalProcessor(BaseMultiModalProcessor[Qwen2VLProcessingInfo] merge_length = image_processor.merge_size**2 def get_replacement_qwen2vl(item_idx: int, modality: str): - grid_thw = out_mm_kwargs[f"{modality}_grid_thw"][item_idx] + out_item = out_mm_kwargs[modality][item_idx] + grid_thw = out_item[f"{modality}_grid_thw"].data assert isinstance(grid_thw, torch.Tensor) num_tokens = int(grid_thw.prod()) // merge_length