commit 6756e0f47f7e0a150138efe8cf9a92ff540ec1fd Author: zhousha <736730048@qq.com> Date: Fri Sep 19 14:32:49 2025 +0800 update diff --git a/026_0010.jpg b/026_0010.jpg new file mode 100755 index 0000000..5571278 Binary files /dev/null and b/026_0010.jpg differ diff --git a/Dockerfile_kunlunxin b/Dockerfile_kunlunxin new file mode 100644 index 0000000..c049615 --- /dev/null +++ b/Dockerfile_kunlunxin @@ -0,0 +1,18 @@ +FROM r200-8f_xmlir-ubuntu_2004_x86_64:v0.27 + +WORKDIR /workspace/ +COPY ./model_test_caltech_http_kunlunxin.py /workspace/ +COPY ./microsoft_beit_base_patch16_224_pt22k_ft22k /model + + +# 安装transformers 4.46.3 +RUN /root/miniconda/envs/python38_torch201_cuda/bin/python3 -m pip install --upgrade pip +RUN /root/miniconda/envs/python38_torch201_cuda/bin/python3 -m pip install --no-cache-dir -i https://pypi.tuna.tsinghua.edu.cn/simple transformers==4.46.3 +RUN /root/miniconda/envs/python38_torch201_cuda/bin/python3 -m pip install flask + +EXPOSE 80 + +ENTRYPOINT ["/root/miniconda/envs/python38_torch201_cuda/bin/python3", "model_test_caltech_http_kunlunxin.py"] + + + diff --git a/README.md b/README.md new file mode 100644 index 0000000..579a0e9 --- /dev/null +++ b/README.md @@ -0,0 +1,35 @@ +# enginex-r200-vc-cnn + +运行于昆仑芯 R200 算力卡的【视觉分类】引擎,基于 CNN 架构,支持 BEiT、MobileViT 等流行模型 + +## QuickStart + +1、从 modelscope上下载视觉分类的模型,例如 microsoft/beit-base-patch16-224 +```python +modelscope download --model microsoft/beit-base-patch16-224 +``` + +2、使用Dockerfile生成镜像 +使用 Dockerfile 生成 镜像 +```python +docker build -f Dockerfile_kunlunxin -t kunlunxin-my:v1 . +``` +其中基础镜像 r200-8f_xmlir-ubuntu_2004_x86_64:v0.27 通过联系昆仑芯厂商技术支持可获取 +注意 Dockerfile 中已预先将模型 microsoft_beit_base_patch16_224_pt22k_ft22k 放在了 /model 下面 + +3、启动docker +```python +docker run -it --rm \ + --privileged \ + -p 10086:80 \ + --device /dev/xpu0 \ + --device /dev/xpuctrl \ + --name kunlun-my-v4 \ + kunlunxin-my:v1 +``` + +4、测试服务 +```python +curl -X POST http://localhost:10086/v1/private/s782b4996 \ +> -F "image=@/home/zhoushasha/models/026_0010.jpg" +``` diff --git a/model_test_caltech_http_kunlunxin.py b/model_test_caltech_http_kunlunxin.py new file mode 100644 index 0000000..0fae836 --- /dev/null +++ b/model_test_caltech_http_kunlunxin.py @@ -0,0 +1,287 @@ +import requests +import json +import torch +from PIL import Image +from io import BytesIO +from transformers import AutoImageProcessor, AutoModelForImageClassification +import os +import time +import subprocess +from flask import Flask, request, jsonify + +class ImageClassifier: + def __init__(self, model_path: str, device: torch.device): + """初始化图像分类器,指定设备""" + # 模型路径有效性校验 + if not os.path.exists(model_path): + raise ValueError(f"模型路径不存在: {model_path}") + if not os.path.isdir(model_path): + raise ValueError(f"模型路径不是目录: {model_path}") + + # 检查模型必要文件 + required_files = ["config.json", "pytorch_model.bin"] + missing_files = [f for f in required_files if not os.path.exists(os.path.join(model_path, f))] + if missing_files: + raise ValueError(f"模型路径缺少必要文件: {missing_files}") + + self.processor = AutoImageProcessor.from_pretrained(model_path) + self.model = AutoModelForImageClassification.from_pretrained(model_path) + + # 将模型移动到指定设备 + self.model = self.model.to(device) + self.device = device + + # 检查设备类型并打印信息 + if device.type == "cuda": + if is_kunlunxin_gpu(): + print(f"模型是否在昆仑芯GPU上: {next(self.model.parameters()).is_cuda}") + print("使用符号重写技术(CUDA兼容模式)") + else: + print(f"模型是否在NVIDIA GPU上: {next(self.model.parameters()).is_cuda}") + else: + print(f"模型在 {device.type.upper()} 上运行") + + # 多卡处理 + if device.type == "cuda" and torch.cuda.device_count() > 1: + self.model = torch.nn.DataParallel(self.model) + + self.id2label = self.model.module.config.id2label if hasattr(self.model, 'module') else self.model.config.id2label + + def predict_single_image(self, image: Image.Image) -> dict: + """预测单张PIL图片""" + try: + # 预处理 + inputs = self.processor(images=image, return_tensors="pt") + + # 将输入数据移动到设备 + inputs = inputs.to(self.device) + + # 模型推理 + start_time = time.time() + with torch.no_grad(): + ts = time.time() + outputs = self.model(** inputs) + print('kunlunxin T1', time.time() - ts, flush=True) + + ts = time.time() + for i in range(1000): + outputs = self.model(**inputs) + print('kunlunxin T2', time.time() - ts, flush=True) + + processing_time = time.time() - start_time + + # 获取预测结果(只取置信度最高的一个) + logits = outputs.logits + probs = torch.nn.functional.softmax(logits, dim=1) + top_probs, top_indices = probs.topk(1, dim=1) + + # 整理结果 + class_idx = top_indices[0, 0].item() + confidence = top_probs[0, 0].item() + + return { + "class_id": class_idx, + "class_name": self.id2label[class_idx], + "confidence": confidence, + "device_used": str(self.device), # 修改为使用 str(device) + "processing_time": processing_time, + "hardware_info": get_hardware_info() + } + + except Exception as e: + print(f"处理图片时出错: {e}") + return { + "class_id": -1, + "class_name": "error", + "confidence": 0.0, + "device_used": str(self.device), # 修改为使用 str(device) + "processing_time": 0.0, + "error": str(e), + "hardware_info": get_hardware_info() + } + +def is_kunlunxin_gpu(): + """检查是否为昆仑芯GPU""" + try: + # 检查xpu-smi命令是否存在 + result = subprocess.run(['which', 'xpu-smi'], capture_output=True, text=True) + if result.returncode == 0: + return True + + # 检查PCI设备中是否有昆仑芯特征 + result = subprocess.run(['lspci'], capture_output=True, text=True) + if 'Kunlun' in result.stdout or 'kunlun' in result.stdout or 'R200' in result.stdout: + return True + + # 检查/dev目录下是否有xpu设备 + result = subprocess.run(['ls', '/dev/xpu*'], capture_output=True, text=True) + if result.returncode == 0 and 'xpu' in result.stdout: + return True + + # 检查是否有昆仑芯相关的环境变量或库加载 + if 'XPURT' in os.environ.get('LD_PRELOAD', '') or 'libxpurt' in os.environ.get('LD_PRELOAD', ''): + return True + + except: + pass + return False + +def check_kunlunxin_available(): + """检查昆仑芯GPU是否可用(通过CUDA接口和符号重写)""" + if torch.cuda.is_available(): + try: + # 检查设备名称 + if torch.cuda.device_count() > 0: + device_name = torch.cuda.get_device_name(0) + if 'R200' in device_name or '8F' in device_name: + return True + # 如果有符号重写的迹象,也认为是昆仑芯 + if is_kunlunxin_gpu(): + return True + except: + pass + return False + +def get_hardware_info(): + """获取硬件信息""" + info = { + "device_type": "昆仑芯GPU", + "backend": "符号重写模式 (CUDA兼容)" + } + + try: + if torch.cuda.is_available() and torch.cuda.device_count() > 0: + info.update({ + "device_name": torch.cuda.get_device_name(0), + "device_count": torch.cuda.device_count(), + "cuda_available": torch.cuda.is_available() + }) + except: + pass + + return info + +def get_device(): + """获取最佳可用设备(优先昆仑芯GPU)""" + # 首先检查昆仑芯GPU(通过符号重写) + if check_kunlunxin_available(): + print("检测到昆仑芯GPU可用(符号重写模式)") + try: + device = torch.device("cuda:0") + print(f"使用昆仑芯设备(CUDA兼容模式): {device}") + return device + except Exception as e: + print(f"设置昆仑芯设备时出错: {e}") + return torch.device("cpu") + + # 然后检查NVIDIA GPU + elif torch.cuda.is_available(): + print("检测到NVIDIA GPU可用") + return torch.device("cuda:0") + + # 最后使用CPU + else: + print("未检测到加速设备,使用CPU") + return torch.device("cpu") + +def setup_kunlunxin_environment(): + """设置昆仑芯GPU环境""" + if check_kunlunxin_available(): + print("正在设置昆仑芯GPU环境...") + try: + # 设置昆仑芯相关的环境变量 + os.environ['XPU_VISIBLE_DEVICES'] = '0' # 使用第一张昆仑芯卡 + print("昆仑芯GPU环境设置完成(符号重写模式)") + except Exception as e: + print(f"设置昆仑芯环境时出错: {e}") + +# 初始化服务 +app = Flask(__name__) +MODEL_PATH = os.environ.get("MODEL_PATH", "/model") # 模型路径(环境变量或默认路径) + +# 设置昆仑芯环境并初始化分类器 +setup_kunlunxin_environment() +device = get_device() +classifier = ImageClassifier(MODEL_PATH, device) + +@app.route('/v1/private/s782b4996', methods=['POST']) +def predict_single(): + """接收单张图片并返回预测结果""" + if 'image' not in request.files: + return jsonify({ + "prediction": { + "class_id": -1, + "class_name": "error", + "confidence": 0.0, + "device_used": str(device), + "processing_time": 0.0, + "error": "请求中未包含图片", + "hardware_info": get_hardware_info() + }, + "status": "error" + }), 400 + + image_file = request.files['image'] + try: + image = Image.open(BytesIO(image_file.read())).convert("RGB") + + # 获取预测结果 + prediction_result = classifier.predict_single_image(image) + + # 构建响应 + response = { + "prediction": prediction_result, + "status": "success" + } + + return jsonify(response) + + except Exception as e: + return jsonify({ + "prediction": { + "class_id": -1, + "class_name": "error", + "confidence": 0.0, + "device_used": str(device), + "processing_time": 0.0, + "error": str(e), + "hardware_info": get_hardware_info() + }, + "status": "error" + }), 500 + +@app.route('/health', methods=['GET']) +def health_check(): + """健康检查接口""" + hardware_info = get_hardware_info() + + return jsonify({ + "status": "healthy", + "kunlunxin_available": check_kunlunxin_available(), + "device_used": str(device), + "hardware_info": hardware_info, + "model_loaded": True, + "service": "昆仑芯GPU图像分类服务" + }), 200 + +@app.route('/device-info', methods=['GET']) +def device_info(): + """设备信息接口""" + return jsonify(get_hardware_info()) + +if __name__ == "__main__": + # 打印启动信息 + print("=== 昆仑芯GPU图像分类服务启动 ===") + print(f"模型路径: {MODEL_PATH}") + print(f"使用设备: {device}") + print(f"昆仑芯可用: {check_kunlunxin_available()}") + + if check_kunlunxin_available(): + print("✅ 服务将在昆仑芯GPU上运行(符号重写模式)") + elif torch.cuda.is_available(): + print("⚠️ 服务在NVIDIA GPU上运行") + else: + print("⚠️ 服务在CPU上运行") + + print("服务启动完成,监听端口 80") + app.run(host='0.0.0.0', port=80, debug=False) \ No newline at end of file