Python 使用 OpenCV 从扫描的 struct 化纸张表格的 jpg 中检测所有矩形和轮廓

发布于04月07日

As a newbie to OCR, I am attempting to detect all the rectangles/boxes in a scanned document illustrated here

但是，下面提供的代码片段的输出无法从图像中识别出相当数量的矩形.

import cv2
import imutils
import warnings
import numpy as np

warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt

img = cv2.imread("example.jpg") 
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

threshold = cv2.adaptiveThreshold(
    gray.copy(), 
    255, # maximum value assigned to pixel values exceeding the threshold
    cv2.ADAPTIVE_THRESH_GAUSSIAN_C,  # gaussian weighted sum of neighborhood
    cv2.THRESH_BINARY_INV,  # thresholding type 
    301, # block size (5x5 window)
    21) # constant

font = cv2.FONT_HERSHEY_COMPLEX
keypoints = cv2.findContours(threshold.copy(), 
                             cv2.RETR_CCOMP, 
                             cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(keypoints)
working_image = None
idx = 1
cropped_field_images = []

contour_list = list(contours)
contour_list.reverse()
rev_contours = tuple(contour_list)

for contour in rev_contours:   
    x,y,w,h = cv2.boundingRect(contour) 
    area = cv2.contourArea(contour)
    approx = cv2.approxPolyDP(contour, 10, True)
    location = None
    if len(approx) == 4 and area > 1500 : #if the shape size is rectangular
        working_image = cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)   
        cv2.putText(img, str(idx), (x, y), font, 1, (0,0,255))
        
        location = approx
        mask = np.zeros(gray.shape, np.uint8) #Create a blank mask
        rect_img = cv2.drawContours(mask, [location], 0, 255, -1) 
        rect_img = cv2.bitwise_and(img, img, mask = mask) 
        
        (x, y) = np.where(mask==255)
        (x1, y1) = (np.min(x), np.min(y))
        (x2, y2) = (np.max(x), np.max(y))
        cropped_rect = gray[x1:x2+1, y1:y2+1]
        
        cropped_field_images.append(cropped_rect)
        
        idx += 1
    
plt.figure(figsize = (11.69*2,8.27*2))
plt.axis('off')
plt.imshow(cv2.cvtColor(working_image, cv2.COLOR_BGR2RGB));

上述代码的结果如下图所示.任何左上角没有数字和绿色边界的矩形都无法被上面的代码识别，并已被红星标记.我在上面的代码片段中try 了不同的opencv2自适应阈值的类型、块大小和常量，但这些红星矩形总是从输出结果中忽略.

我错过了什么？我可以考虑什么来确保这些框/区域不会在结果中遗漏？在优化自适应阈值以确保输出结果中包含所有红星矩形部分方面的任何帮助都将受到极大的感谢.

sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]]) sharpened = cv2.filter2D(img, -1, sharpen_kernel) hsv = cv2.cvtColor(sharpened.copy(), cv2.COLOR_BGR2HSV) mask_grey = cv2.inRange(hsv, (0, 0, 100), (255, 5, 255)) # Build mask of non black pixels. nzmask = cv2.inRange(hsv, (0, 0, 5), (255, 255, 255)) # Erode the mask - all pixels around a black pixels should not be masked. nzmask = cv2.erode(nzmask, np.ones((3,3))) mask_grey = mask_grey & nzmask cleaned_bg_img = img.copy() cleaned_bg_img[np.where(mask_grey)] = 255 cleaned_bg_img = cv2.cvtColor(cleaned_bg_img.copy(), cv2.COLOR_BGR2RGB) gray = cv2.cvtColor(cleaned_bg_img, cv2.COLOR_BGR2GRAY)

epsilon = 0.01 * cv2.arcLength(contour, True) x,y,w,h = cv2.boundingRect(contour) area = cv2.contourArea(contour) approx = cv2.approxPolyDP(contour, epsilon, True) if len(approx) > 3 and area > 3000 : #if the shape size is rectangular (or polygon: document not scanned perfectly) #if area is > 3000 to weed out small rectangles on characters/checkboxes. working_image = cv2.rectangle(cleaned_bg_img,(x,y),(x+w,y+h),(0,255,0),2)

import cv2 import imutils import warnings import numpy as np warnings.filterwarnings('ignore') import matplotlib.pyplot as plt img = cv2.imread("example.jpg") sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]]) sharpened = cv2.filter2D(img, -1, sharpen_kernel) hsv = cv2.cvtColor(sharpened.copy(), cv2.COLOR_BGR2HSV) mask_grey = cv2.inRange(hsv, (0, 0, 100), (255, 5, 255)) # Build mask of non black pixels. nzmask = cv2.inRange(hsv, (0, 0, 5), (255, 255, 255)) # Erode the mask - all pixels around a black pixels should not be masked. nzmask = cv2.erode(nzmask, np.ones((3,3))) mask_grey = mask_grey & nzmask cleaned_bg_img = img.copy() cleaned_bg_img[np.where(mask_grey)] = 255 cleaned_bg_img = cv2.cvtColor(cleaned_bg_img.copy(), cv2.COLOR_BGR2RGB) gray = cv2.cvtColor(cleaned_bg_img, cv2.COLOR_BGR2GRAY) threshold = cv2.adaptiveThreshold( gray.copy(), 255, # maximum value assigned to pixel values exceeding the threshold cv2.ADAPTIVE_THRESH_GAUSSIAN_C, # gaussian weighted sum of neighborhood cv2.THRESH_BINARY_INV, # thresholding type 301, # block size (5x5 window) 11) # constant font = cv2.FONT_HERSHEY_COMPLEX # find the largest bounded rectangle from the contours keypoints = cv2.findContours(threshold.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE) contours = imutils.grab_contours(keypoints) working_image = None idx = 1 cropped_field_images = [] # the following code section ensures we read the rectangles from # the top of the page, not bottom contour_list = list(contours) contour_list.reverse() rev_contours = tuple(contour_list) for contour in rev_contours: epsilon = 0.01 * cv2.arcLength(contour, True) x,y,w,h = cv2.boundingRect(contour) area = cv2.contourArea(contour) approx = cv2.approxPolyDP(contour, epsilon, True) location = None if len(approx) > 3 and area > 3000 : # if the shape size is rect/polygon: document not scanned perfectly) # if area is >3000 to weed out small rectangles on characters/checkboxes etc. working_image = cv2.rectangle(cleaned_bg_img,(x,y),(x+w,y+h),(0,255,0),2) cv2.putText(cleaned_bg_img, str(idx), (x, y), font, 1, (0,0,255)) location = approx mask = np.zeros(gray.shape, np.uint8) #Create a blank mask rect_img = cv2.drawContours(mask, [location], 0, 255, -1) #Draw our contours for the specific location rect_img = cv2.bitwise_and(cleaned_bg_img, cleaned_bg_img, mask = mask) #Overlay the mask with the image (x, y) = np.where(mask==255) (x1, y1) = (np.min(x), np.min(y)) (x2, y2) = (np.max(x), np.max(y)) cropped_rect = gray[x1:x2+1, y1:y2+1] cropped_field_images.append(cropped_rect) idx += 1 plt.figure(figsize = (11.69*2,8.27*2)) plt.axis('off') plt.imshow(cv2.cvtColor(working_image, cv2.COLOR_BGR2RGB));

Python 使用 OpenCV 从扫描的 struct 化纸张表格的 jpg 中检测所有矩形和轮廓

推荐答案

Python相关问答推荐

将jit与numpy linSpace函数一起使用时出错

对于一个给定的数字，找出一个整数的最小和最大可能的和

将输入管道传输到正在运行的Python脚本中

如何找到满足各组口罩条件的第一行？

从spaCy的句子中提取日期

在含噪声的3D点网格中识别4连通点模式

Asyncio：如何从子进程中读取stdout？

如何指定列数据类型

如何在Python中使用Pandas将R s Tukey s HSD表转换为相关矩阵''

处理具有多个独立头的CSV文件

导入错误：无法导入名称'；操作'；

人口全部乱序 - Python—Matplotlib—映射

用SymPy在Python中求解指数函数

Python将一个列值分割成多个列，并保持其余列相同

极点替换值大于组内另一个极点数据帧的最大值

Django更新视图未更新

如何在PYTHON中向单元测试S Side_Effect发送额外参数？

Numpy`astype(Int)`给出`np.int64`而不是`int`-怎么办？

Fake pathlib.使用pyfakefs的类变量中的路径'

Pandas查找给定时间戳之前的最后一个值