As a newbie to OCR, I am attempting to detect all the rectangles/boxes in a scanned document illustrated here JPG of structured form

但是,下面提供的代码片段的输出无法从图像中识别出相当数量的矩形.

import cv2
import imutils
import warnings
import numpy as np

warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt

img = cv2.imread("example.jpg") 
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

threshold = cv2.adaptiveThreshold(
    gray.copy(), 
    255, # maximum value assigned to pixel values exceeding the threshold
    cv2.ADAPTIVE_THRESH_GAUSSIAN_C,  # gaussian weighted sum of neighborhood
    cv2.THRESH_BINARY_INV,  # thresholding type 
    301, # block size (5x5 window)
    21) # constant

font = cv2.FONT_HERSHEY_COMPLEX
keypoints = cv2.findContours(threshold.copy(), 
                             cv2.RETR_CCOMP, 
                             cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(keypoints)
working_image = None
idx = 1
cropped_field_images = []

contour_list = list(contours)
contour_list.reverse()
rev_contours = tuple(contour_list)

for contour in rev_contours:   
    x,y,w,h = cv2.boundingRect(contour) 
    area = cv2.contourArea(contour)
    approx = cv2.approxPolyDP(contour, 10, True)
    location = None
    if len(approx) == 4 and area > 1500 : #if the shape size is rectangular
        working_image = cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)   
        cv2.putText(img, str(idx), (x, y), font, 1, (0,0,255))
        
        location = approx
        mask = np.zeros(gray.shape, np.uint8) #Create a blank mask
        rect_img = cv2.drawContours(mask, [location], 0, 255, -1) 
        rect_img = cv2.bitwise_and(img, img, mask = mask) 
        
        (x, y) = np.where(mask==255)
        (x1, y1) = (np.min(x), np.min(y))
        (x2, y2) = (np.max(x), np.max(y))
        cropped_rect = gray[x1:x2+1, y1:y2+1]
        
        cropped_field_images.append(cropped_rect)
        
        idx += 1
    
plt.figure(figsize = (11.69*2,8.27*2))
plt.axis('off')
plt.imshow(cv2.cvtColor(working_image, cv2.COLOR_BGR2RGB));

上述代码的结果如下图所示.任何左上角没有数字和绿色边界的矩形都无法被上面的代码识别,并已被红星标记.我在上面的代码片段中try 了不同的opencv2自适应阈值的类型、块大小和常量,但这些红星矩形总是从输出结果中忽略.

Detection result

我错过了什么?我可以考虑什么来确保这些框/区域不会在结果中遗漏?在优化自适应阈值以确保输出结果中包含所有红星矩形部分方面的任何帮助都将受到极大的感谢.

推荐答案

我设法想出了如何捕获此表单中的所有矩形/框.正如塞姆贝在上面的 comments 中提到的那样,100,所以对于我的特殊挑战来说,图像配准可能不是最有效的方法.

我对这一挑战的解决方案是通过以下步骤得出的:

  1. 对图像进行预处理,go 除灰度阴影区域. 这是在下面的代码段中实现的,其中图像首先被锐化,然后灰色背景被移除.

sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
sharpened = cv2.filter2D(img, -1, sharpen_kernel)


hsv = cv2.cvtColor(sharpened.copy(), cv2.COLOR_BGR2HSV)
mask_grey = cv2.inRange(hsv, (0, 0, 100), (255, 5, 255))


# Build mask of non black pixels.
nzmask = cv2.inRange(hsv, (0, 0, 5), (255, 255, 255))

# Erode the mask - all pixels around a black pixels should not be masked.
nzmask = cv2.erode(nzmask, np.ones((3,3)))
mask_grey = mask_grey & nzmask


cleaned_bg_img = img.copy()
cleaned_bg_img[np.where(mask_grey)] = 255

cleaned_bg_img = cv2.cvtColor(cleaned_bg_img.copy(), cv2.COLOR_BGR2RGB)
gray = cv2.cvtColor(cleaned_bg_img, cv2.COLOR_BGR2GRAY) 

结果如下图所示(对比问题中的图1以对比差异)

No grey background

  1. 在遍历所识别的等高线时,假设并非所有所识别的等高线的形状都将变为矩形,因此,不是仅将循环限制为长度约为4的等高线,而是捕获长度为4或更大的所有等高线,然后使用cv2.boundingRect的等高线作为结果

epsilon = 0.01 * cv2.arcLength(contour, True)
x,y,w,h = cv2.boundingRect(contour) 
area = cv2.contourArea(contour)
approx = cv2.approxPolyDP(contour, epsilon, True)

if len(approx) > 3 and area > 3000 :
    #if the shape size is rectangular (or polygon: document not scanned perfectly)
    #if area is > 3000 to weed out small rectangles on characters/checkboxes.
    
    working_image = cv2.rectangle(cleaned_bg_img,(x,y),(x+w,y+h),(0,255,0),2) 

这些更改的最终结果如下图所示.虽然问题图2中的结果确定了88个字段/框,但这些更改使我能够确定100 102个字段/框.

Final result

下面提供了可能希望使用此方法从 struct 化纸质记录中识别字段/框的任何人的最终代码片段.我希望这个解决方案对任何有类似挑战的人都有帮助.此外,我们非常欢迎任何可用于表格 struct 化格式的变体!

import cv2
import imutils
import warnings
import numpy as np

warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt

img = cv2.imread("example.jpg") 

sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
sharpened = cv2.filter2D(img, -1, sharpen_kernel)


hsv = cv2.cvtColor(sharpened.copy(), cv2.COLOR_BGR2HSV)
mask_grey = cv2.inRange(hsv, (0, 0, 100), (255, 5, 255))


# Build mask of non black pixels.
nzmask = cv2.inRange(hsv, (0, 0, 5), (255, 255, 255))

# Erode the mask - all pixels around a black pixels should not be masked.
nzmask = cv2.erode(nzmask, np.ones((3,3)))
mask_grey = mask_grey & nzmask


cleaned_bg_img = img.copy()
cleaned_bg_img[np.where(mask_grey)] = 255

cleaned_bg_img = cv2.cvtColor(cleaned_bg_img.copy(), cv2.COLOR_BGR2RGB)
gray = cv2.cvtColor(cleaned_bg_img, cv2.COLOR_BGR2GRAY)


threshold = cv2.adaptiveThreshold(
    gray.copy(), 
    255, # maximum value assigned to pixel values exceeding the threshold
    cv2.ADAPTIVE_THRESH_GAUSSIAN_C,  # gaussian weighted sum of neighborhood
    cv2.THRESH_BINARY_INV,  # thresholding type 
    301, # block size (5x5 window)
    11) # constant


font = cv2.FONT_HERSHEY_COMPLEX
# find the largest bounded rectangle from the contours
keypoints = cv2.findContours(threshold.copy(), 
                             cv2.RETR_CCOMP, 
                             cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(keypoints)
working_image = None
idx = 1
cropped_field_images = []

# the following code section ensures we read the rectangles from 
# the top of the page, not bottom

contour_list = list(contours)
contour_list.reverse()
rev_contours = tuple(contour_list)

for contour in rev_contours:   
    
    epsilon = 0.01 * cv2.arcLength(contour, True)
    
    x,y,w,h = cv2.boundingRect(contour) 
    area = cv2.contourArea(contour)
    approx = cv2.approxPolyDP(contour, epsilon, True)
    location = None
    if len(approx) > 3 and area > 3000 : 
        # if the shape size is rect/polygon: document not scanned perfectly)
        # if area is >3000 to weed out small rectangles on characters/checkboxes etc.
        working_image = cv2.rectangle(cleaned_bg_img,(x,y),(x+w,y+h),(0,255,0),2)    

        cv2.putText(cleaned_bg_img, str(idx), (x, y), font, 1, (0,0,255))
        
        location = approx
        mask = np.zeros(gray.shape, np.uint8) #Create a blank mask
        rect_img = cv2.drawContours(mask, [location], 0, 255, -1) #Draw our contours for the specific location
        rect_img = cv2.bitwise_and(cleaned_bg_img, cleaned_bg_img, mask = mask) #Overlay the mask with the image
        
        (x, y) = np.where(mask==255)
        (x1, y1) = (np.min(x), np.min(y))
        (x2, y2) = (np.max(x), np.max(y))
        cropped_rect = gray[x1:x2+1, y1:y2+1]
        
        cropped_field_images.append(cropped_rect)
        
        
        idx += 1
    
plt.figure(figsize = (11.69*2,8.27*2))
plt.axis('off')
plt.imshow(cv2.cvtColor(working_image, cv2.COLOR_BGR2RGB));

Python相关问答推荐

将jit与numpy linSpace函数一起使用时出错

对于一个给定的数字,找出一个整数的最小和最大可能的和

将输入管道传输到正在运行的Python脚本中

如何找到满足各组口罩条件的第一行?

从spaCy的句子中提取日期

在含噪声的3D点网格中识别4连通点模式

Asyncio:如何从子进程中读取stdout?

如何指定列数据类型

如何在Python中使用Pandas将R s Tukey s HSD表转换为相关矩阵''

处理具有多个独立头的CSV文件

导入错误:无法导入名称';操作';

人口全部乱序 - Python—Matplotlib—映射

用SymPy在Python中求解指数函数

Python将一个列值分割成多个列,并保持其余列相同

极点替换值大于组内另一个极点数据帧的最大值

Django更新视图未更新

如何在PYTHON中向单元测试S Side_Effect发送额外参数?

Numpy`astype(Int)`给出`np.int64`而不是`int`-怎么办?

Fake pathlib.使用pyfakefs的类变量中的路径'

Pandas查找给定时间戳之前的最后一个值