Created
August 4, 2024 22:07
-
-
Save gabonator/dd9192c3cfa884fbed59a2420f98c85e to your computer and use it in GitHub Desktop.
Align a photo of table for OCR, trim to contents
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import cv2 | |
import numpy as np | |
def custom_processing_function(b, g, r): | |
r = int(r) | |
g = int(g) | |
b = int(b) | |
y = (r+g+b)/3 | |
mdif = max(abs(y-r), abs(y-g), abs(y-b)) | |
if mdif < 30: | |
y = y * 3 - 200 | |
y = max(0, min(y, 255)) | |
return y, y, y | |
y = y * 4 - 200 | |
y = max(0, min(y, 255)) | |
return y, y, y | |
# Load the image | |
image = cv2.imread(sys.argv[1]) | |
height, width, channels = image.shape | |
for y in range(height): | |
for x in range(width): | |
b, g, r = image[y, x] | |
b, g, r = custom_processing_function(b, g, r) | |
image[y, x] = [b, g, r] | |
# Convert to grayscale | |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
# Apply GaussianBlur to reduce noise and improve contour detection | |
blurred = cv2.GaussianBlur(gray, (5, 5), 0) | |
# Apply thresholding to get a binary image | |
_, thresh = cv2.threshold(blurred, 128, 255, cv2.THRESH_BINARY_INV) | |
# Find contours | |
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
# Sort contours by area (largest first) | |
contours = sorted(contours, key=cv2.contourArea, reverse=True) | |
# Assume the largest contour is the table | |
for contour in contours: | |
# Approximate the contour to a polygon | |
epsilon = 0.02 * cv2.arcLength(contour, True) | |
approx = cv2.approxPolyDP(contour, epsilon, True) | |
# We expect a rectangle, so it should have 4 points | |
if len(approx) == 4: | |
# Get the points of the contour | |
pts = np.array(approx).reshape(4, 2) | |
# Order the points in a consistent order (top-left, top-right, bottom-right, bottom-left) | |
pts = sorted(pts, key=lambda x: x[1]) | |
if pts[0][0] > pts[1][0]: | |
pts[0], pts[1] = pts[1], pts[0] | |
if pts[2][0] > pts[3][0]: | |
pts[2], pts[3] = pts[3], pts[2] | |
ordered_pts = np.array([pts[0], pts[1], pts[3], pts[2]], dtype='float32') | |
rw = np.linalg.norm(ordered_pts[0] - ordered_pts[1]) | |
rh = np.linalg.norm(ordered_pts[1] - ordered_pts[2]) | |
# Define the destination points (top-left, top-right, bottom-right, bottom-left) | |
height = int(2400*rh/rw) | |
width = 2400 | |
dst_pts = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]], dtype='float32') | |
# Compute the perspective transform matrix | |
matrix = cv2.getPerspectiveTransform(ordered_pts, dst_pts) | |
# Apply the perspective transform | |
aligned_table = cv2.warpPerspective(image, matrix, (width, height)) | |
cv2.imwrite(sys.argv[2], aligned_table) | |
break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment