diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..aba2f10 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +*.whl filter=lfs diff=lfs merge=lfs -text +*.a filter=lfs diff=lfs merge=lfs -text +*.mp4 filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text diff --git a/1.jpg b/1.jpg new file mode 100644 index 0000000..0801017 Binary files /dev/null and b/1.jpg differ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d0edc08 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +FROM corex:3.2.1-ubuntu20.04-py3.10-slim + +WORKDIR /workspace + +ENV LD_LIBRARY_PATH=/usr/local/corex/lib:/usr/local/openmpi/lib: +COPY requirements.txt /workspace/ +COPY whl /workspace/whl +RUN pip install whl/*.whl -i https://mirrors.aliyun.com/pypi/simple/ +RUN pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ +RUN pip install numpy==1.23.5 -i https://mirrors.aliyun.com/pypi/simple/ +RUN apt install -y libgl1 +COPY 1.jpg /workspace/ +COPY test.py /workspace/ +COPY app.py /workspace/ +COPY pre_processor.py /workspace/ +COPY run.sh /workspace/ +RUN rm -rf whl + +CMD ["./run.sh"] diff --git a/README.md b/README.md index eaa4509..85c5c6f 100644 --- a/README.md +++ b/README.md @@ -1 +1,26 @@ # enginex-bi_series-paddleocr + +## Build docker image +```bash +docker build -t paddleocr:bi . +``` + +## 测试 +### 下载模型 +支持 PP-OCRv4及以下版本 +PP-OCRv4模型: +- det: https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_infer.tar +- rec: https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_rec_infer.tar +- cls: https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar + +### 测试 +模型放在`/mnt/models/ocr/`下,运行下面的测试程序,可以识别出示例图片中的文字 +```bash +python3 test.py +``` + +## OCR API Server +提供了一个`http` API server,可以通过服务的方式加载模型以及进行图片识别,启动命令如下: +```bash +python3 app.py +``` \ No newline at end of file diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..2321bc5 --- /dev/null +++ b/run.sh @@ -0,0 +1,5 @@ +#! /usr/bin/env bash + +unset NVIDIA_VISIBLE_DEVICES +unset CUDA_VISIBLE_DEVICES +python3 app.py \ No newline at end of file diff --git a/test.py b/test.py new file mode 100644 index 0000000..9bcaed3 --- /dev/null +++ b/test.py @@ -0,0 +1,84 @@ +from paddleocr import PaddleOCR +import time +import cv2 +import numpy as np +import os +import logging +logging.basicConfig(level=logging.INFO) + +# ocr = PaddleOCR( +# use_doc_orientation_classify=False, +# use_doc_unwarping=False, +# use_textline_orientation=False) # text detection + text recognition +# ocr = PaddleOCR(use_doc_orientation_classify=True, use_doc_unwarping=True) # text image preprocessing + text detection + textline orientation classification + text recognition +# ocr = PaddleOCR(use_doc_orientation_classify=False, use_doc_unwarping=False) # text detection + textline orientation classification + text recognition +ocr = PaddleOCR( + det_model_dir="/mnt/models/ocr/ch_PP-OCRv4_det_server_infer", # https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar + rec_model_dir="/mnt/models/ocr/ch_PP-OCRv4_rec_server_infer", # https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar + cls_model_dir="/mnt/models/ocr/ch_ppocr_mobile_v2.0_cls_infer", # https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar + use_doc_orientation_classify=False, + use_doc_unwarping=False, + use_gpu=True, + cpu_threads=32, + enable_mkldnn=True, + use_textline_orientation=False) # Switch to PP-OCRv5_mobile models + +time.sleep(5) +total_time = 0 +WITH_PREPROCESSING = True +NOISE_THRESHOLD = int(os.getenv("NOISE_THRESHOLD", "10")) +for i in range(1): + # img_path = f"test{i%5}.pdf" + img_path = f"1.jpg" + print(f"run for {img_path}") + image = cv2.imread(img_path) + start = time.time() + + # height, width = image.shape[:2] + # image = cv2.resize(image, (width // 2, height // 2)) + + if WITH_PREPROCESSING: + # 计算图像的灰度直方图 + if len(image.shape) == 3: # 如果是彩色图像 (具有三个通道) + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + else: + gray = image + + logging.info(f"cvtColor takes {time.time() - start} s") + + # 噪声水平估计(简单地使用标准差作为代理) + noise_level = np.std(gray) # 在灰度图像上评估噪声水平 + logging.info(f"noise_level: {noise_level}") + if noise_level > NOISE_THRESHOLD: + logging.info("denoising") + if len(image.shape) == 3: # 如果是彩色图像 + logging.info("denoisin for color") + processed_img = cv2.fastNlMeansDenoisingColored(image) + else: # 如果是灰度图像 + logging.info("denoising for gray") + processed_img = cv2.fastNlMeansDenoising(image) + else: + processed_img = image + + logging.info(f"Preprocessing takes {time.time() - start} s") + else: + processed_img = image + logging.info("Skip Preprocessing") + + result = ocr.ocr(image) + end = time.time() + print(f"{img_path} elapsed: {end - start}") + total_time += (end - start) + +print(f"total time: {total_time}") +for idx in range(len(result)): + res = result[idx] + print(f"res: {res}") + # for line in res: + # print(line) + +# result = ocr.predict("docs/images/00006737.jpg") +# for res in result: +# res.print() +# res.save_to_img("output") +# res.save_to_json("output") diff --git a/whl/flash_attn-2.5.8+corex.3.2.1-cp310-cp310-linux_x86_64.whl b/whl/flash_attn-2.5.8+corex.3.2.1-cp310-cp310-linux_x86_64.whl new file mode 100644 index 0000000..95120f6 --- /dev/null +++ b/whl/flash_attn-2.5.8+corex.3.2.1-cp310-cp310-linux_x86_64.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a51b006a2c406281b1f8aa3a0493ddbf37d51590af8fb5edf0a94dc674dc7b70 +size 4142325 diff --git a/whl/paddlepaddle-2.4.1+corex.3.2.1-cp310-cp310-linux_x86_64.whl b/whl/paddlepaddle-2.4.1+corex.3.2.1-cp310-cp310-linux_x86_64.whl new file mode 100644 index 0000000..06bcf9b --- /dev/null +++ b/whl/paddlepaddle-2.4.1+corex.3.2.1-cp310-cp310-linux_x86_64.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cde78d2bfa6af0ab5db59608255f240e8b32e8f8ebc3693f064d4e68e9dabaa +size 296430748 diff --git a/whl/torch-2.1.0+corex.3.2.1-cp310-cp310-linux_x86_64.whl b/whl/torch-2.1.0+corex.3.2.1-cp310-cp310-linux_x86_64.whl new file mode 100644 index 0000000..3502e3c --- /dev/null +++ b/whl/torch-2.1.0+corex.3.2.1-cp310-cp310-linux_x86_64.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25b78c30b3e3558aefa6b2acd3113f438ff5285b009c7c424ee2f3dd4c8966ce +size 285288281 diff --git a/whl/torchaudio-2.1.0+corex.3.2.1-cp310-cp310-linux_x86_64.whl b/whl/torchaudio-2.1.0+corex.3.2.1-cp310-cp310-linux_x86_64.whl new file mode 100644 index 0000000..825d200 --- /dev/null +++ b/whl/torchaudio-2.1.0+corex.3.2.1-cp310-cp310-linux_x86_64.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c39b86d36713cfabf9e2f4a5013757f3d568fb42d5cf22160f418bb30bfe3f9e +size 3033678 diff --git a/whl/torchvision-0.16.0+corex.3.2.1-cp310-cp310-linux_x86_64.whl b/whl/torchvision-0.16.0+corex.3.2.1-cp310-cp310-linux_x86_64.whl new file mode 100644 index 0000000..13d85cd --- /dev/null +++ b/whl/torchvision-0.16.0+corex.3.2.1-cp310-cp310-linux_x86_64.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dc3b7fa989ebb2d7150d908961e79ff566cb74c5c366109d99ea42bc8849d0f +size 6925160