| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268 |
- import math
- import cv2
- import numpy as np
- import peakutils
- import copy
- import argparse
- def preprocess_image(original, gaussian_blur_size, adaptive_threshold_block_size, adaptive_threshold_mean_adjustment, num_dilations):
- img = cv2.GaussianBlur(original, (gaussian_blur_size, gaussian_blur_size), 0)
- img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, adaptive_threshold_block_size, adaptive_threshold_mean_adjustment)
- kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]], np.uint8)
- for i in range(num_dilations):
- img = cv2.dilate(img, kernel)
- return img
- def find_biggest_contour(img):
- contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
- biggest = None
- max_area = 0
- for contour in contours:
- area = cv2.contourArea(contour)
- if area > max_area:
- biggest = contour
- max_area = area
- return biggest
- def erode_contour(img_shape, contour, kernel_size, iterations):
- contour_img = np.zeros(img_shape, dtype=np.uint8)
- cv2.drawContours(contour_img, [contour], 0, 255, -1)
- kernel = np.ones((kernel_size, kernel_size), dtype=np.uint8)
- contour_img = cv2.erode(contour_img, kernel, iterations=iterations)
- contour_img = cv2.dilate(contour_img, kernel, iterations=iterations)
- return find_biggest_contour(contour_img)
- def get_contour_corners(img, contour):
- height, width = img.shape
- top_left = [width, height]
- top_right = [-1, height]
- bottom_left = [width, -1]
- bottom_right = [-1, -1]
- for vertex in contour:
- point = vertex[0]
- sum = point[0] + point[1]
- diff = point[0] - point[1]
- if sum < top_left[0] + top_left[1]:
- top_left = point
- if sum > bottom_right[0] + bottom_right[1]:
- bottom_right = point
- if diff < bottom_left[0] - bottom_left[1]:
- bottom_left = point
- if diff > top_right[0] - top_right[1]:
- top_right = point
- return top_left, top_right, bottom_right, bottom_left
- def segment_length(p1, p2):
- dx = p1[0] - p2[0]
- dy = p1[1] - p2[1]
- return math.sqrt(dx ** 2 + dy ** 2)
- def get_longest_side(poly):
- previous = poly[-1]
- max = 0
- for current in poly:
- len = segment_length(previous, current)
- if len > max:
- max = len
- previous = current
- return max
- def extract_square(img, top_left, top_right, bottom_right, bottom_left):
- src = [top_left, top_right, bottom_right, bottom_left]
- longest = get_longest_side(src)
- dst = [[0, 0], [longest - 1, 0], [longest - 1, longest - 1], [0, longest - 1]]
- m = cv2.getPerspectiveTransform(np.array(src, dtype=np.float32), np.array(dst, dtype=np.float32))
- return cv2.warpPerspective(img, m, (int(longest), int(longest)))
- def get_fundamental_frequency(ffts):
- all_peak_indexes = []
- max_peak_count = None
- f_est = None
- for fft in ffts:
- # Use the upper half of the fft array, since this seems to always exclude the DC component.
- peak_indexes = peakutils.indexes(np.flip(abs(fft[len(fft) // 2:])), thres=0.3)
- peak_count = len(peak_indexes)
- if peak_count < 1:
- return None
- if (max_peak_count is None) or (peak_count > max_peak_count):
- max_peak_count = peak_count
- f_est = round(peak_indexes[peak_count - 1] / peak_count)
- all_peak_indexes.append(peak_indexes)
- if f_est < 2:
- return None
- min_err = None
- f = None
- for delta in range(-2, 3):
- err = 0
- f_current = f_est + delta
- for peak_indexes in all_peak_indexes:
- for i, peak_index in enumerate(peak_indexes):
- err += (peak_index - f_current * (i + 1)) ** 2
- if (min_err is None) or (err < min_err):
- min_err = err
- f = f_current
- return int(f)
- def get_threshold_from_quantile(img, quantile):
- height, width = img.shape
- num_pixels = height * width
- pixels = np.sort(np.reshape(img, num_pixels))
- return pixels[int(num_pixels * quantile)]
- def extract_grid_colours(img, num_rows, num_cols, sampling_block_size_ratio):
- height, width = img.shape
- row_delta = int(height * sampling_block_size_ratio / num_rows / 2)
- col_delta = int(width * sampling_block_size_ratio / num_cols / 2)
- sampling_block_area = (2 * row_delta + 1) * (2 * col_delta + 1)
- grid = []
- for row in range(num_rows):
- line = []
- y = int(((row + 0.5) / num_rows) * height)
- for col in range(num_cols):
- sum = 0
- x = int(((col + 0.5) / num_cols) * width)
- for dy in range(-row_delta, row_delta + 1):
- for dx in range(-col_delta, col_delta + 1):
- sum += img[y + dy, x + dx]
- line.append(sum / sampling_block_area)
- grid.append(line)
- return grid
- def grid_colours_to_blocks(grid_colours, num_rows, num_cols, sampling_threshold):
- grid = copy.deepcopy(grid_colours)
- warning = False
- for row in range(round(num_rows / 2)):
- for col in range(num_cols):
- row2 = num_rows - row - 1
- col2 = num_cols - col - 1
- delta1 = grid_colours[row][col] - sampling_threshold
- delta2 = grid_colours[row2][col2] - sampling_threshold
- if (delta1 > 0) and (delta2 > 0):
- block = 0
- elif (delta1 < 0) and (delta2 < 0):
- block = 1
- else:
- warning = True
- if abs(delta1) > abs(delta2):
- block = 1 if delta1 < 0 else 0
- else:
- block = 1 if delta2 < 0 else 0
- grid[row][col] = grid[row2][col2] = block
- return warning, grid
- def draw_point(image, point, colour):
- height, width, _ = image.shape
- for dx in range(-10, 11):
- for dy in range(-10, 11):
- x = point[0] + dx
- y = point[1] + dy
- if (x >= 0) and (y >= 0) and (x < width) and (y < height):
- image[y, x] = colour
- def show_image(image):
- cv2.namedWindow('xword', cv2.WINDOW_NORMAL)
- cv2.imshow('xword', image)
- while cv2.waitKey() & 0xFF != ord('q'):
- pass
- cv2.destroyAllWindows()
- def extract_crossword(
- file_name,
- gaussian_blur_size=11,
- adaptive_threshold_block_size=11,
- adaptive_threshold_mean_adjustment=2,
- not_square=False,
- num_dilations=1,
- contour_erosion_kernel_size=5,
- contour_erosion_iterations=5,
- line_detector_element_size=51,
- sampling_block_size_ratio=0.25,
- sampling_threshold_quantile=0.3,
- sampling_threshold=None,
- grid_line_thickness=4,
- grid_square_size=64,
- grid_border_size=20,
- ):
- original = cv2.imread(file_name, cv2.IMREAD_GRAYSCALE)
- if original is None:
- raise RuntimeError("Failed to load image")
- img = preprocess_image(original, gaussian_blur_size, adaptive_threshold_block_size, adaptive_threshold_mean_adjustment, num_dilations)
- biggest = find_biggest_contour(img)
- biggest = erode_contour(img.shape, biggest, contour_erosion_kernel_size, contour_erosion_iterations)
- top_left, top_right, bottom_right, bottom_left = get_contour_corners(img, biggest)
- img = extract_square(img, top_left, top_right, bottom_right, bottom_left)
- horiz_elem = cv2.getStructuringElement(cv2.MORPH_RECT, (line_detector_element_size, 1))
- horiz_lines = cv2.erode(img, horiz_elem)
- horiz_lines = cv2.dilate(horiz_lines, horiz_elem)
- vert_elem = cv2.getStructuringElement(cv2.MORPH_RECT, (1, line_detector_element_size))
- vert_lines = cv2.erode(img, vert_elem)
- vert_lines = cv2.dilate(vert_lines, vert_elem)
- row_fft = np.fft.fft(np.sum(horiz_lines, axis=1))
- col_fft = np.fft.fft(np.sum(vert_lines, axis=0))
- if not_square:
- num_rows = get_fundamental_frequency([row_fft])
- num_cols = get_fundamental_frequency([col_fft])
- else:
- num_rows = num_cols = get_fundamental_frequency([row_fft, col_fft])
- block_img = extract_square(original, top_left, top_right, bottom_right, bottom_left)
- if sampling_threshold is None:
- sampling_threshold = get_threshold_from_quantile(block_img, sampling_threshold_quantile)
- else:
- sampling_threshold = sampling_threshold
- grid_colours = extract_grid_colours(block_img, num_rows, num_cols, sampling_block_size_ratio)
- warning, grid = grid_colours_to_blocks(grid_colours, num_rows, num_cols, sampling_threshold)
- step = grid_square_size + grid_line_thickness
- grid_height = num_rows * step + grid_line_thickness
- grid_width = num_cols * step + grid_line_thickness
- output = np.full([2 * grid_border_size + grid_height, 2 * grid_border_size + grid_width], 255, dtype=np.uint8)
- cv2.rectangle(output, (grid_border_size, grid_border_size), (grid_border_size + grid_width - 1, grid_border_size + grid_height - 1), 0, -1)
- for row in range(num_rows):
- y = row * step + grid_line_thickness + grid_border_size
- for col in range(num_cols):
- if grid[row][col] == 0:
- x = col * step + grid_line_thickness + grid_border_size
- cv2.rectangle(output, (x, y), (x + grid_square_size - 1, y + grid_square_size - 1), 255, -1)
- _, png = cv2.imencode('.png', output)
- return png.tobytes(), warning
|