This commit is contained in:
zhousha
2025-09-19 14:32:49 +08:00
commit 6756e0f47f
4 changed files with 340 additions and 0 deletions

BIN
026_0010.jpg Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

18
Dockerfile_kunlunxin Normal file
View File

@@ -0,0 +1,18 @@
FROM r200-8f_xmlir-ubuntu_2004_x86_64:v0.27
WORKDIR /workspace/
COPY ./model_test_caltech_http_kunlunxin.py /workspace/
COPY ./microsoft_beit_base_patch16_224_pt22k_ft22k /model
# 安装transformers 4.46.3
RUN /root/miniconda/envs/python38_torch201_cuda/bin/python3 -m pip install --upgrade pip
RUN /root/miniconda/envs/python38_torch201_cuda/bin/python3 -m pip install --no-cache-dir -i https://pypi.tuna.tsinghua.edu.cn/simple transformers==4.46.3
RUN /root/miniconda/envs/python38_torch201_cuda/bin/python3 -m pip install flask
EXPOSE 80
ENTRYPOINT ["/root/miniconda/envs/python38_torch201_cuda/bin/python3", "model_test_caltech_http_kunlunxin.py"]

35
README.md Normal file
View File

@@ -0,0 +1,35 @@
# enginex-r200-vc-cnn
运行于昆仑芯 R200 算力卡的【视觉分类】引擎,基于 CNN 架构,支持 BEiT、MobileViT 等流行模型
## QuickStart
1、从 modelscope上下载视觉分类的模型例如 microsoft/beit-base-patch16-224
```python
modelscope download --model microsoft/beit-base-patch16-224
```
2、使用Dockerfile生成镜像
使用 Dockerfile 生成 镜像
```python
docker build -f Dockerfile_kunlunxin -t kunlunxin-my:v1 .
```
其中基础镜像 r200-8f_xmlir-ubuntu_2004_x86_64:v0.27 通过联系昆仑芯厂商技术支持可获取
注意 Dockerfile 中已预先将模型 microsoft_beit_base_patch16_224_pt22k_ft22k 放在了 /model 下面
3、启动docker
```python
docker run -it --rm \
--privileged \
-p 10086:80 \
--device /dev/xpu0 \
--device /dev/xpuctrl \
--name kunlun-my-v4 \
kunlunxin-my:v1
```
4、测试服务
```python
curl -X POST http://localhost:10086/v1/private/s782b4996 \
> -F "image=@/home/zhoushasha/models/026_0010.jpg"
```

View File

@@ -0,0 +1,287 @@
import requests
import json
import torch
from PIL import Image
from io import BytesIO
from transformers import AutoImageProcessor, AutoModelForImageClassification
import os
import time
import subprocess
from flask import Flask, request, jsonify
class ImageClassifier:
def __init__(self, model_path: str, device: torch.device):
"""初始化图像分类器,指定设备"""
# 模型路径有效性校验
if not os.path.exists(model_path):
raise ValueError(f"模型路径不存在: {model_path}")
if not os.path.isdir(model_path):
raise ValueError(f"模型路径不是目录: {model_path}")
# 检查模型必要文件
required_files = ["config.json", "pytorch_model.bin"]
missing_files = [f for f in required_files if not os.path.exists(os.path.join(model_path, f))]
if missing_files:
raise ValueError(f"模型路径缺少必要文件: {missing_files}")
self.processor = AutoImageProcessor.from_pretrained(model_path)
self.model = AutoModelForImageClassification.from_pretrained(model_path)
# 将模型移动到指定设备
self.model = self.model.to(device)
self.device = device
# 检查设备类型并打印信息
if device.type == "cuda":
if is_kunlunxin_gpu():
print(f"模型是否在昆仑芯GPU上: {next(self.model.parameters()).is_cuda}")
print("使用符号重写技术CUDA兼容模式")
else:
print(f"模型是否在NVIDIA GPU上: {next(self.model.parameters()).is_cuda}")
else:
print(f"模型在 {device.type.upper()} 上运行")
# 多卡处理
if device.type == "cuda" and torch.cuda.device_count() > 1:
self.model = torch.nn.DataParallel(self.model)
self.id2label = self.model.module.config.id2label if hasattr(self.model, 'module') else self.model.config.id2label
def predict_single_image(self, image: Image.Image) -> dict:
"""预测单张PIL图片"""
try:
# 预处理
inputs = self.processor(images=image, return_tensors="pt")
# 将输入数据移动到设备
inputs = inputs.to(self.device)
# 模型推理
start_time = time.time()
with torch.no_grad():
ts = time.time()
outputs = self.model(** inputs)
print('kunlunxin T1', time.time() - ts, flush=True)
ts = time.time()
for i in range(1000):
outputs = self.model(**inputs)
print('kunlunxin T2', time.time() - ts, flush=True)
processing_time = time.time() - start_time
# 获取预测结果(只取置信度最高的一个)
logits = outputs.logits
probs = torch.nn.functional.softmax(logits, dim=1)
top_probs, top_indices = probs.topk(1, dim=1)
# 整理结果
class_idx = top_indices[0, 0].item()
confidence = top_probs[0, 0].item()
return {
"class_id": class_idx,
"class_name": self.id2label[class_idx],
"confidence": confidence,
"device_used": str(self.device), # 修改为使用 str(device)
"processing_time": processing_time,
"hardware_info": get_hardware_info()
}
except Exception as e:
print(f"处理图片时出错: {e}")
return {
"class_id": -1,
"class_name": "error",
"confidence": 0.0,
"device_used": str(self.device), # 修改为使用 str(device)
"processing_time": 0.0,
"error": str(e),
"hardware_info": get_hardware_info()
}
def is_kunlunxin_gpu():
"""检查是否为昆仑芯GPU"""
try:
# 检查xpu-smi命令是否存在
result = subprocess.run(['which', 'xpu-smi'], capture_output=True, text=True)
if result.returncode == 0:
return True
# 检查PCI设备中是否有昆仑芯特征
result = subprocess.run(['lspci'], capture_output=True, text=True)
if 'Kunlun' in result.stdout or 'kunlun' in result.stdout or 'R200' in result.stdout:
return True
# 检查/dev目录下是否有xpu设备
result = subprocess.run(['ls', '/dev/xpu*'], capture_output=True, text=True)
if result.returncode == 0 and 'xpu' in result.stdout:
return True
# 检查是否有昆仑芯相关的环境变量或库加载
if 'XPURT' in os.environ.get('LD_PRELOAD', '') or 'libxpurt' in os.environ.get('LD_PRELOAD', ''):
return True
except:
pass
return False
def check_kunlunxin_available():
"""检查昆仑芯GPU是否可用通过CUDA接口和符号重写"""
if torch.cuda.is_available():
try:
# 检查设备名称
if torch.cuda.device_count() > 0:
device_name = torch.cuda.get_device_name(0)
if 'R200' in device_name or '8F' in device_name:
return True
# 如果有符号重写的迹象,也认为是昆仑芯
if is_kunlunxin_gpu():
return True
except:
pass
return False
def get_hardware_info():
"""获取硬件信息"""
info = {
"device_type": "昆仑芯GPU",
"backend": "符号重写模式 (CUDA兼容)"
}
try:
if torch.cuda.is_available() and torch.cuda.device_count() > 0:
info.update({
"device_name": torch.cuda.get_device_name(0),
"device_count": torch.cuda.device_count(),
"cuda_available": torch.cuda.is_available()
})
except:
pass
return info
def get_device():
"""获取最佳可用设备优先昆仑芯GPU"""
# 首先检查昆仑芯GPU通过符号重写
if check_kunlunxin_available():
print("检测到昆仑芯GPU可用符号重写模式")
try:
device = torch.device("cuda:0")
print(f"使用昆仑芯设备CUDA兼容模式: {device}")
return device
except Exception as e:
print(f"设置昆仑芯设备时出错: {e}")
return torch.device("cpu")
# 然后检查NVIDIA GPU
elif torch.cuda.is_available():
print("检测到NVIDIA GPU可用")
return torch.device("cuda:0")
# 最后使用CPU
else:
print("未检测到加速设备使用CPU")
return torch.device("cpu")
def setup_kunlunxin_environment():
"""设置昆仑芯GPU环境"""
if check_kunlunxin_available():
print("正在设置昆仑芯GPU环境...")
try:
# 设置昆仑芯相关的环境变量
os.environ['XPU_VISIBLE_DEVICES'] = '0' # 使用第一张昆仑芯卡
print("昆仑芯GPU环境设置完成符号重写模式")
except Exception as e:
print(f"设置昆仑芯环境时出错: {e}")
# 初始化服务
app = Flask(__name__)
MODEL_PATH = os.environ.get("MODEL_PATH", "/model") # 模型路径(环境变量或默认路径)
# 设置昆仑芯环境并初始化分类器
setup_kunlunxin_environment()
device = get_device()
classifier = ImageClassifier(MODEL_PATH, device)
@app.route('/v1/private/s782b4996', methods=['POST'])
def predict_single():
"""接收单张图片并返回预测结果"""
if 'image' not in request.files:
return jsonify({
"prediction": {
"class_id": -1,
"class_name": "error",
"confidence": 0.0,
"device_used": str(device),
"processing_time": 0.0,
"error": "请求中未包含图片",
"hardware_info": get_hardware_info()
},
"status": "error"
}), 400
image_file = request.files['image']
try:
image = Image.open(BytesIO(image_file.read())).convert("RGB")
# 获取预测结果
prediction_result = classifier.predict_single_image(image)
# 构建响应
response = {
"prediction": prediction_result,
"status": "success"
}
return jsonify(response)
except Exception as e:
return jsonify({
"prediction": {
"class_id": -1,
"class_name": "error",
"confidence": 0.0,
"device_used": str(device),
"processing_time": 0.0,
"error": str(e),
"hardware_info": get_hardware_info()
},
"status": "error"
}), 500
@app.route('/health', methods=['GET'])
def health_check():
"""健康检查接口"""
hardware_info = get_hardware_info()
return jsonify({
"status": "healthy",
"kunlunxin_available": check_kunlunxin_available(),
"device_used": str(device),
"hardware_info": hardware_info,
"model_loaded": True,
"service": "昆仑芯GPU图像分类服务"
}), 200
@app.route('/device-info', methods=['GET'])
def device_info():
"""设备信息接口"""
return jsonify(get_hardware_info())
if __name__ == "__main__":
# 打印启动信息
print("=== 昆仑芯GPU图像分类服务启动 ===")
print(f"模型路径: {MODEL_PATH}")
print(f"使用设备: {device}")
print(f"昆仑芯可用: {check_kunlunxin_available()}")
if check_kunlunxin_available():
print("✅ 服务将在昆仑芯GPU上运行符号重写模式")
elif torch.cuda.is_available():
print("⚠️ 服务在NVIDIA GPU上运行")
else:
print("⚠️ 服务在CPU上运行")
print("服务启动完成,监听端口 80")
app.run(host='0.0.0.0', port=80, debug=False)