As a newbie to OCR, I am attempting to detect all the rectangles/boxes in a scanned document illustrated here
但是,下面提供的代码片段的输出无法从图像中识别出相当数量的矩形.
import cv2
import imutils
import warnings
import numpy as np
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
img = cv2.imread("example.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
threshold = cv2.adaptiveThreshold(
gray.copy(),
255, # maximum value assigned to pixel values exceeding the threshold
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, # gaussian weighted sum of neighborhood
cv2.THRESH_BINARY_INV, # thresholding type
301, # block size (5x5 window)
21) # constant
font = cv2.FONT_HERSHEY_COMPLEX
keypoints = cv2.findContours(threshold.copy(),
cv2.RETR_CCOMP,
cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(keypoints)
working_image = None
idx = 1
cropped_field_images = []
contour_list = list(contours)
contour_list.reverse()
rev_contours = tuple(contour_list)
for contour in rev_contours:
x,y,w,h = cv2.boundingRect(contour)
area = cv2.contourArea(contour)
approx = cv2.approxPolyDP(contour, 10, True)
location = None
if len(approx) == 4 and area > 1500 : #if the shape size is rectangular
working_image = cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
cv2.putText(img, str(idx), (x, y), font, 1, (0,0,255))
location = approx
mask = np.zeros(gray.shape, np.uint8) #Create a blank mask
rect_img = cv2.drawContours(mask, [location], 0, 255, -1)
rect_img = cv2.bitwise_and(img, img, mask = mask)
(x, y) = np.where(mask==255)
(x1, y1) = (np.min(x), np.min(y))
(x2, y2) = (np.max(x), np.max(y))
cropped_rect = gray[x1:x2+1, y1:y2+1]
cropped_field_images.append(cropped_rect)
idx += 1
plt.figure(figsize = (11.69*2,8.27*2))
plt.axis('off')
plt.imshow(cv2.cvtColor(working_image, cv2.COLOR_BGR2RGB));
上述代码的结果如下图所示.任何左上角没有数字和绿色边界的矩形都无法被上面的代码识别,并已被红星标记.我在上面的代码片段中try 了不同的opencv2自适应阈值的类型、块大小和常量,但这些红星矩形总是从输出结果中忽略.
我错过了什么?我可以考虑什么来确保这些框/区域不会在结果中遗漏?在优化自适应阈值以确保输出结果中包含所有红星矩形部分方面的任何帮助都将受到极大的感谢.