import requests def model_infer(vlm_url: str, payload): try: response = requests.post(vlm_url + "/infer", json=payload) if response.status_code == 200: print("Succeed!") print("Response:", response.json()) else: print(f"Failed,code: {response.status_code}") print("Error detail:", response.text) except requests.exceptions.RequestException as e: print("request error:", str(e)) payload = { "prompt": "图片有什么?详细描述", "generation": { "max_new_tokens": 64, "temperature": 0.7, "top_p": 0.9, "do_sample": True }, "dtype": "auto", "warmup_runs": 0, "measure_token_times": False } url = "http://127.0.0.1:10055" model_infer(url, payload)