import json import os import requests import base64 import time def model_infer(vlm_url, messages): try: param_dict = { "model": "vlm", "messages": messages } start_time = time.time() response = requests.post(vlm_url, json=param_dict, timeout=300) end_time = time.time() response = response.json() response_content = response['choices'][0]['message']['content'] return { "vlm_url": vlm_url, "data": response_content, "elapsed": end_time - start_time } except Exception as e: return {"error": str(e), "vlm_url": vlm_url} def bench_one_image(image_path): with open(image_path, "rb") as f: image_base64 = base64.b64encode(f.read()).decode() question = "图片有什么?一句话描述" messages = [ { "role": "user", "content": [ { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}, }, {"type": "text", "text": question}, ], } ] result = model_infer("http://localhost:8080/generate", messages) print (result) return result def main(): bench_one_image('./cars.jpg') if __name__ == "__main__": main()