from paddleocr import PaddleOCR import time import cv2 import numpy as np import os import logging logging.basicConfig(level=logging.INFO) # ocr = PaddleOCR( # use_doc_orientation_classify=False, # use_doc_unwarping=False, # use_textline_orientation=False) # text detection + text recognition # ocr = PaddleOCR(use_doc_orientation_classify=True, use_doc_unwarping=True) # text image preprocessing + text detection + textline orientation classification + text recognition # ocr = PaddleOCR(use_doc_orientation_classify=False, use_doc_unwarping=False) # text detection + textline orientation classification + text recognition ocr = PaddleOCR( det_model_dir="/mnt/models/ocr/ch_PP-OCRv4_det_server_infer", # https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar rec_model_dir="/mnt/models/ocr/ch_PP-OCRv4_rec_server_infer", # https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar cls_model_dir="/mnt/models/ocr/ch_ppocr_mobile_v2.0_cls_infer", # https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar use_doc_orientation_classify=False, use_doc_unwarping=False, use_gpu=True, cpu_threads=32, enable_mkldnn=True, use_textline_orientation=False) # Switch to PP-OCRv5_mobile models time.sleep(5) total_time = 0 WITH_PREPROCESSING = True NOISE_THRESHOLD = int(os.getenv("NOISE_THRESHOLD", "10")) for i in range(1): # img_path = f"test{i%5}.pdf" img_path = f"1.jpg" print(f"run for {img_path}") image = cv2.imread(img_path) start = time.time() # height, width = image.shape[:2] # image = cv2.resize(image, (width // 2, height // 2)) if WITH_PREPROCESSING: # 计算图像的灰度直方图 if len(image.shape) == 3: # 如果是彩色图像 (具有三个通道) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) else: gray = image logging.info(f"cvtColor takes {time.time() - start} s") # 噪声水平估计(简单地使用标准差作为代理) noise_level = np.std(gray) # 在灰度图像上评估噪声水平 logging.info(f"noise_level: {noise_level}") if noise_level > NOISE_THRESHOLD: logging.info("denoising") if len(image.shape) == 3: # 如果是彩色图像 logging.info("denoisin for color") processed_img = cv2.fastNlMeansDenoisingColored(image) else: # 如果是灰度图像 logging.info("denoising for gray") processed_img = cv2.fastNlMeansDenoising(image) else: processed_img = image logging.info(f"Preprocessing takes {time.time() - start} s") else: processed_img = image logging.info("Skip Preprocessing") result = ocr.ocr(image) end = time.time() print(f"{img_path} elapsed: {end - start}") total_time += (end - start) print(f"total time: {total_time}") for idx in range(len(result)): res = result[idx] print(f"res: {res}") # for line in res: # print(line) # result = ocr.predict("docs/images/00006737.jpg") # for res in result: # res.print() # res.save_to_img("output") # res.save_to_json("output")