vlm: enable radix cache for qwen-vl models (#5349)

Co-authored-by: Xinyuan Tong <justinning0323@outlook.com>
This commit is contained in:
Mick
2025-04-24 12:35:05 +09:00
committed by GitHub
parent 7d0edf3cae
commit c998d04b46
26 changed files with 429 additions and 331 deletions

View File

@@ -229,9 +229,9 @@ class TestMiniCPMVLogits(VisionLLMLogitsBase):
input_ids=input_ids,
input_embedding=model.get_input_embeddings(),
image_data_embedding_func=model.get_image_feature,
placeholder_token_ids=[
self.processor.tokenizer.unk_token_id,
],
placeholder_tokens={
Modality.IMAGE: self.processor.tokenizer.unk_token_id,
},
)
self.compare_outputs(sglang_output, hf_output)