enginex-r200-vc-cnn/model_test_caltech_http_kunlunxin.py

import requests
import json
import torch
from PIL import Image
from io import BytesIO
from transformers import AutoImageProcessor, AutoModelForImageClassification
import os
import time
import subprocess
from flask import Flask, request, jsonify

class ImageClassifier:
    def __init__(self, model_path: str, device: torch.device):
        """初始化图像分类器，指定设备"""
        # 模型路径有效性校验
        if not os.path.exists(model_path):
            raise ValueError(f"模型路径不存在: {model_path}")
        if not os.path.isdir(model_path):
            raise ValueError(f"模型路径不是目录: {model_path}")

        # 检查模型必要文件
        required_files = ["config.json", "pytorch_model.bin"]
        missing_files = [f for f in required_files if not os.path.exists(os.path.join(model_path, f))]
        if missing_files:
            raise ValueError(f"模型路径缺少必要文件: {missing_files}")

        self.processor = AutoImageProcessor.from_pretrained(model_path)
        self.model = AutoModelForImageClassification.from_pretrained(model_path)

        # 将模型移动到指定设备
        self.model = self.model.to(device)
        self.device = device

        # 检查设备类型并打印信息
        if device.type == "cuda":
            if is_kunlunxin_gpu():
                print(f"模型是否在昆仑芯GPU上: {next(self.model.parameters()).is_cuda}")
                print("使用符号重写技术（CUDA兼容模式）")
            else:
                print(f"模型是否在NVIDIA GPU上: {next(self.model.parameters()).is_cuda}")
        else:
            print(f"模型在 {device.type.upper()} 上运行")

        # 多卡处理
        if device.type == "cuda" and torch.cuda.device_count() > 1:
            self.model = torch.nn.DataParallel(self.model)

        self.id2label = self.model.module.config.id2label if hasattr(self.model, 'module') else self.model.config.id2label

    def predict_single_image(self, image: Image.Image) -> dict:
        """预测单张PIL图片"""
        try:
            # 预处理
            inputs = self.processor(images=image, return_tensors="pt")

            # 将输入数据移动到设备
            inputs = inputs.to(self.device)

            # 模型推理
            start_time = time.time()
            with torch.no_grad():
                ts = time.time()
                outputs = self.model(** inputs)
                print('kunlunxin T1', time.time() - ts, flush=True)

                ts = time.time()
                for i in range(1000):
                    outputs = self.model(**inputs)
                print('kunlunxin T2', time.time() - ts, flush=True)

            processing_time = time.time() - start_time

            # 获取预测结果（只取置信度最高的一个）
            logits = outputs.logits
            probs = torch.nn.functional.softmax(logits, dim=1)
            top_probs, top_indices = probs.topk(1, dim=1)

            # 整理结果
            class_idx = top_indices[0, 0].item()
            confidence = top_probs[0, 0].item()

            return {
                "class_id": class_idx,
                "class_name": self.id2label[class_idx],
                "confidence": confidence,
                "device_used": str(self.device),  # 修改为使用 str(device)
                "processing_time": processing_time,
                "hardware_info": get_hardware_info()
            }

        except Exception as e:
            print(f"处理图片时出错: {e}")
            return {
                "class_id": -1,
                "class_name": "error",
                "confidence": 0.0,
                "device_used": str(self.device),  # 修改为使用 str(device)
                "processing_time": 0.0,
                "error": str(e),
                "hardware_info": get_hardware_info()
            }

def is_kunlunxin_gpu():
    """检查是否为昆仑芯GPU"""
    try:
        # 检查xpu-smi命令是否存在
        result = subprocess.run(['which', 'xpu-smi'], capture_output=True, text=True)
        if result.returncode == 0:
            return True

        # 检查PCI设备中是否有昆仑芯特征
        result = subprocess.run(['lspci'], capture_output=True, text=True)
        if 'Kunlun' in result.stdout or 'kunlun' in result.stdout or 'R200' in result.stdout:
            return True

        # 检查/dev目录下是否有xpu设备
        result = subprocess.run(['ls', '/dev/xpu*'], capture_output=True, text=True)
        if result.returncode == 0 and 'xpu' in result.stdout:
            return True

        # 检查是否有昆仑芯相关的环境变量或库加载
        if 'XPURT' in os.environ.get('LD_PRELOAD', '') or 'libxpurt' in os.environ.get('LD_PRELOAD', ''):
            return True

    except:
        pass
    return False

def check_kunlunxin_available():
    """检查昆仑芯GPU是否可用（通过CUDA接口和符号重写）"""
    if torch.cuda.is_available():
        try:
            # 检查设备名称
            if torch.cuda.device_count() > 0:
                device_name = torch.cuda.get_device_name(0)
                if 'R200' in device_name or '8F' in device_name:
                    return True
            # 如果有符号重写的迹象，也认为是昆仑芯
            if is_kunlunxin_gpu():
                return True
        except:
            pass
    return False

def get_hardware_info():
    """获取硬件信息"""
    info = {
        "device_type": "昆仑芯GPU",
        "backend": "符号重写模式 (CUDA兼容)"
    }

    try:
        if torch.cuda.is_available() and torch.cuda.device_count() > 0:
            info.update({
                "device_name": torch.cuda.get_device_name(0),
                "device_count": torch.cuda.device_count(),
                "cuda_available": torch.cuda.is_available()
            })
    except:
        pass

    return info

def get_device():
    """获取最佳可用设备（优先昆仑芯GPU）"""
    # 首先检查昆仑芯GPU（通过符号重写）
    if check_kunlunxin_available():
        print("检测到昆仑芯GPU可用（符号重写模式）")
        try:
            device = torch.device("cuda:0")
            print(f"使用昆仑芯设备（CUDA兼容模式）: {device}")
            return device
        except Exception as e:
            print(f"设置昆仑芯设备时出错: {e}")
            return torch.device("cpu")

    # 然后检查NVIDIA GPU
    elif torch.cuda.is_available():
        print("检测到NVIDIA GPU可用")
        return torch.device("cuda:0")

    # 最后使用CPU
    else:
        print("未检测到加速设备，使用CPU")
        return torch.device("cpu")

def setup_kunlunxin_environment():
    """设置昆仑芯GPU环境"""
    if check_kunlunxin_available():
        print("正在设置昆仑芯GPU环境...")
        try:
            # 设置昆仑芯相关的环境变量
            os.environ['XPU_VISIBLE_DEVICES'] = '0'  # 使用第一张昆仑芯卡
            print("昆仑芯GPU环境设置完成（符号重写模式）")
        except Exception as e:
            print(f"设置昆仑芯环境时出错: {e}")

# 初始化服务
app = Flask(__name__)
MODEL_PATH = os.environ.get("MODEL_PATH", "/model")  # 模型路径（环境变量或默认路径）

# 设置昆仑芯环境并初始化分类器
setup_kunlunxin_environment()
device = get_device()
classifier = ImageClassifier(MODEL_PATH, device)

@app.route('/v1/private/s782b4996', methods=['POST'])
def predict_single():
    """接收单张图片并返回预测结果"""
    if 'image' not in request.files:
        return jsonify({
            "prediction": {
                "class_id": -1,
                "class_name": "error",
                "confidence": 0.0,
                "device_used": str(device),
                "processing_time": 0.0,
                "error": "请求中未包含图片",
                "hardware_info": get_hardware_info()
            },
            "status": "error"
        }), 400

    image_file = request.files['image']
    try:
        image = Image.open(BytesIO(image_file.read())).convert("RGB")

        # 获取预测结果
        prediction_result = classifier.predict_single_image(image)

        # 构建响应
        response = {
            "prediction": prediction_result,
            "status": "success"
        }

        return jsonify(response)

    except Exception as e:
        return jsonify({
            "prediction": {
                "class_id": -1,
                "class_name": "error",
                "confidence": 0.0,
                "device_used": str(device),
                "processing_time": 0.0,
                "error": str(e),
                "hardware_info": get_hardware_info()
            },
            "status": "error"
        }), 500

@app.route('/health', methods=['GET'])
def health_check():
    """健康检查接口"""
    hardware_info = get_hardware_info()

    return jsonify({
        "status": "healthy",
        "kunlunxin_available": check_kunlunxin_available(),
        "device_used": str(device),
        "hardware_info": hardware_info,
        "model_loaded": True,
        "service": "昆仑芯GPU图像分类服务"
    }), 200

@app.route('/device-info', methods=['GET'])
def device_info():
    """设备信息接口"""
    return jsonify(get_hardware_info())

if __name__ == "__main__":
    # 打印启动信息
    print("=== 昆仑芯GPU图像分类服务启动 ===")
    print(f"模型路径: {MODEL_PATH}")
    print(f"使用设备: {device}")
    print(f"昆仑芯可用: {check_kunlunxin_available()}")

    if check_kunlunxin_available():
        print("✅ 服务将在昆仑芯GPU上运行（符号重写模式）")
    elif torch.cuda.is_available():
        print("⚠️  服务在NVIDIA GPU上运行")
    else:
        print("⚠️  服务在CPU上运行")

    print("服务启动完成，监听端口 80")
    app.run(host='0.0.0.0', port=80, debug=False)