Files
enginex-bi_series-paddleocr/test.py

85 lines
3.2 KiB
Python
Raw Normal View History

2025-08-18 16:21:14 +08:00
from paddleocr import PaddleOCR
import time
import cv2
import numpy as np
import os
import logging
logging.basicConfig(level=logging.INFO)
# ocr = PaddleOCR(
# use_doc_orientation_classify=False,
# use_doc_unwarping=False,
# use_textline_orientation=False) # text detection + text recognition
# ocr = PaddleOCR(use_doc_orientation_classify=True, use_doc_unwarping=True) # text image preprocessing + text detection + textline orientation classification + text recognition
# ocr = PaddleOCR(use_doc_orientation_classify=False, use_doc_unwarping=False) # text detection + textline orientation classification + text recognition
ocr = PaddleOCR(
det_model_dir="/mnt/models/ocr/ch_PP-OCRv4_det_server_infer", # https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar
rec_model_dir="/mnt/models/ocr/ch_PP-OCRv4_rec_server_infer", # https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
cls_model_dir="/mnt/models/ocr/ch_ppocr_mobile_v2.0_cls_infer", # https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_gpu=True,
cpu_threads=32,
enable_mkldnn=True,
use_textline_orientation=False) # Switch to PP-OCRv5_mobile models
time.sleep(5)
total_time = 0
WITH_PREPROCESSING = True
NOISE_THRESHOLD = int(os.getenv("NOISE_THRESHOLD", "10"))
for i in range(1):
# img_path = f"test{i%5}.pdf"
img_path = f"1.jpg"
print(f"run for {img_path}")
image = cv2.imread(img_path)
start = time.time()
# height, width = image.shape[:2]
# image = cv2.resize(image, (width // 2, height // 2))
if WITH_PREPROCESSING:
# 计算图像的灰度直方图
if len(image.shape) == 3: # 如果是彩色图像 (具有三个通道)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
logging.info(f"cvtColor takes {time.time() - start} s")
# 噪声水平估计(简单地使用标准差作为代理)
noise_level = np.std(gray) # 在灰度图像上评估噪声水平
logging.info(f"noise_level: {noise_level}")
if noise_level > NOISE_THRESHOLD:
logging.info("denoising")
if len(image.shape) == 3: # 如果是彩色图像
logging.info("denoisin for color")
processed_img = cv2.fastNlMeansDenoisingColored(image)
else: # 如果是灰度图像
logging.info("denoising for gray")
processed_img = cv2.fastNlMeansDenoising(image)
else:
processed_img = image
logging.info(f"Preprocessing takes {time.time() - start} s")
else:
processed_img = image
logging.info("Skip Preprocessing")
result = ocr.ocr(image)
end = time.time()
print(f"{img_path} elapsed: {end - start}")
total_time += (end - start)
print(f"total time: {total_time}")
for idx in range(len(result)):
res = result[idx]
print(f"res: {res}")
# for line in res:
# print(line)
# result = ocr.predict("docs/images/00006737.jpg")
# for res in result:
# res.print()
# res.save_to_img("output")
# res.save_to_json("output")