import torch import time import os from PIL import Image from transformers import AutoImageProcessor, AutoModelForImageClassification from flask import Flask, request, jsonify from io import BytesIO # 设置CPU核心数(仅用于可能的底层优化,不影响GPU推理) os.environ["OMP_NUM_THREADS"] = "4" os.environ["MKL_NUM_THREADS"] = "4" os.environ["NUMEXPR_NUM_THREADS"] = "4" os.environ["OPENBLAS_NUM_THREADS"] = "4" os.environ["VECLIB_MAXIMUM_THREADS"] = "4" torch.set_num_threads(4) # 设备配置 - 只关注GPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"当前设备: {device}") print(f"CPU核心数设置: {torch.get_num_threads()}") class ImageClassifier: def __init__(self, model_path: str): self.processor = AutoImageProcessor.from_pretrained(model_path) # 仅加载GPU模型 if device.type == "cuda": self.model = AutoModelForImageClassification.from_pretrained(model_path).to(device) else: self.model = None # 无GPU时模型为None # 保存id2label映射(从模型配置获取) if self.model: self.id2label = self.model.config.id2label else: self.id2label = None def _predict_with_model(self, image) -> dict: """使用GPU执行预测""" try: # 检查GPU模型是否可用 if not self.model or device.type != "cuda": return { "class_id": -1, "class_name": "error", "confidence": 0.0, "device_used": str(device), "processing_time": 0.0, "error": "CUDA设备不可用或模型未加载" } # 记录开始时间 start_time = time.perf_counter() # 处理图片并移动到GPU inputs = self.processor(images=image, return_tensors="pt").to(device) with torch.no_grad(): ts = time.time() outputs = self.model(** inputs) print('mr100 T1', time.time() - ts, flush=True) ts = time.time() for i in range(1000): outputs = self.model(**inputs) print('mr100 T2', time.time() - ts, flush=True) logits = outputs.logits probs = torch.nn.functional.softmax(logits, dim=1) max_prob, max_idx = probs.max(dim=1) class_idx = max_idx.item() # 计算处理时间 processing_time = round(time.perf_counter() - start_time, 6) return { "class_id": class_idx, "class_name": self.id2label[class_idx], "confidence": float(max_prob.item()), "device_used": str(device), "processing_time": processing_time } except Exception as e: return { "class_id": -1, "class_name": "error", "confidence": 0.0, "device_used": str(device), "processing_time": 0.0, "error": str(e) } def predict_single_image(self, image) -> dict: """预测单张图片,仅使用GPU""" results = {"status": "success"} results["prediction"] = self._predict_with_model(image) return results # 初始化服务 app = Flask(__name__) MODEL_PATH = os.environ.get("MODEL_PATH", "/model") # 模型路径(环境变量或默认路径) classifier = ImageClassifier(MODEL_PATH) @app.route('/v1/private/s782b4996', methods=['POST']) def predict_single(): """接收单张图片并返回GPU预测结果""" if 'image' not in request.files: return jsonify({ "status": "error", "prediction": { "class_id": -1, "class_name": "error", "confidence": 0.0, "device_used": str(device), "processing_time": 0.0, "error": "请求中未包含图片" } }), 400 image_file = request.files['image'] try: image = Image.open(BytesIO(image_file.read())).convert("RGB") result = classifier.predict_single_image(image) return jsonify(result) except Exception as e: return jsonify({ "status": "error", "prediction": { "class_id": -1, "class_name": "error", "confidence": 0.0, "device_used": str(device), "processing_time": 0.0, "error": str(e) } }), 500 @app.route('/health', methods=['GET']) def health_check(): return jsonify({ "status": "healthy", "cuda_available": device.type == "cuda", "device_used": str(device), "cpu_threads": torch.get_num_threads() }), 200 if __name__ == "__main__": app.run(host='0.0.0.0', port=80, debug=False)