import math import cv2 import numpy as np import peakutils import copy import argparse def preprocess_image(original, gaussian_blur_size, adaptive_threshold_block_size, adaptive_threshold_mean_adjustment, num_dilations): img = cv2.GaussianBlur(original, (gaussian_blur_size, gaussian_blur_size), 0) img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, adaptive_threshold_block_size, adaptive_threshold_mean_adjustment) kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]], np.uint8) for i in range(num_dilations): img = cv2.dilate(img, kernel) return img def find_biggest_contour(img): contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) biggest = None max_area = 0 for contour in contours: area = cv2.contourArea(contour) if area > max_area: biggest = contour max_area = area return biggest def erode_contour(img_shape, contour, kernel_size, iterations): contour_img = np.zeros(img_shape, dtype=np.uint8) cv2.drawContours(contour_img, [contour], 0, 255, -1) kernel = np.ones((kernel_size, kernel_size), dtype=np.uint8) contour_img = cv2.erode(contour_img, kernel, iterations=iterations) contour_img = cv2.dilate(contour_img, kernel, iterations=iterations) return find_biggest_contour(contour_img) def get_contour_corners(img, contour): height, width = img.shape top_left = [width, height] top_right = [-1, height] bottom_left = [width, -1] bottom_right = [-1, -1] for vertex in contour: point = vertex[0] sum = point[0] + point[1] diff = point[0] - point[1] if sum < top_left[0] + top_left[1]: top_left = point if sum > bottom_right[0] + bottom_right[1]: bottom_right = point if diff < bottom_left[0] - bottom_left[1]: bottom_left = point if diff > top_right[0] - top_right[1]: top_right = point return top_left, top_right, bottom_right, bottom_left def segment_length(p1, p2): dx = p1[0] - p2[0] dy = p1[1] - p2[1] return math.sqrt(dx ** 2 + dy ** 2) def get_longest_side(poly): previous = poly[-1] max = 0 for current in poly: len = segment_length(previous, current) if len > max: max = len previous = current return max def extract_square(img, top_left, top_right, bottom_right, bottom_left): src = [top_left, top_right, bottom_right, bottom_left] longest = get_longest_side(src) dst = [[0, 0], [longest - 1, 0], [longest - 1, longest - 1], [0, longest - 1]] m = cv2.getPerspectiveTransform(np.array(src, dtype=np.float32), np.array(dst, dtype=np.float32)) return cv2.warpPerspective(img, m, (int(longest), int(longest))) def get_fundamental_frequency(ffts): all_peak_indexes = [] max_peak_count = None f_est = None for fft in ffts: # Use the upper half of the fft array, since this seems to always exclude the DC component. peak_indexes = peakutils.indexes(np.flip(abs(fft[len(fft) // 2:])), thres=0.3) peak_count = len(peak_indexes) if peak_count < 1: return None if (max_peak_count is None) or (peak_count > max_peak_count): max_peak_count = peak_count f_est = round(peak_indexes[peak_count - 1] / peak_count) all_peak_indexes.append(peak_indexes) if f_est < 2: return None min_err = None f = None for delta in range(-2, 3): err = 0 f_current = f_est + delta for peak_indexes in all_peak_indexes: for i, peak_index in enumerate(peak_indexes): err += (peak_index - f_current * (i + 1)) ** 2 if (min_err is None) or (err < min_err): min_err = err f = f_current return int(f) def get_threshold_from_quantile(img, quantile): height, width = img.shape num_pixels = height * width pixels = np.sort(np.reshape(img, num_pixels)) return pixels[int(num_pixels * quantile)] def extract_grid_colours(img, num_rows, num_cols, sampling_block_size_ratio): height, width = img.shape row_delta = int(height * sampling_block_size_ratio / num_rows / 2) col_delta = int(width * sampling_block_size_ratio / num_cols / 2) sampling_block_area = (2 * row_delta + 1) * (2 * col_delta + 1) grid = [] for row in range(num_rows): line = [] y = int(((row + 0.5) / num_rows) * height) for col in range(num_cols): sum = 0 x = int(((col + 0.5) / num_cols) * width) for dy in range(-row_delta, row_delta + 1): for dx in range(-col_delta, col_delta + 1): sum += img[y + dy, x + dx] line.append(sum / sampling_block_area) grid.append(line) return grid def grid_colours_to_blocks(grid_colours, num_rows, num_cols, sampling_threshold): grid = copy.deepcopy(grid_colours) warning = False for row in range(round(num_rows / 2)): for col in range(num_cols): row2 = num_rows - row - 1 col2 = num_cols - col - 1 delta1 = grid_colours[row][col] - sampling_threshold delta2 = grid_colours[row2][col2] - sampling_threshold if (delta1 > 0) and (delta2 > 0): block = 0 elif (delta1 < 0) and (delta2 < 0): block = 1 else: warning = True if abs(delta1) > abs(delta2): block = 1 if delta1 < 0 else 0 else: block = 1 if delta2 < 0 else 0 grid[row][col] = grid[row2][col2] = block return warning, grid def draw_point(image, point, colour): height, width, _ = image.shape for dx in range(-10, 11): for dy in range(-10, 11): x = point[0] + dx y = point[1] + dy if (x >= 0) and (y >= 0) and (x < width) and (y < height): image[y, x] = colour def show_image(image): cv2.namedWindow('xword', cv2.WINDOW_NORMAL) cv2.imshow('xword', image) while cv2.waitKey() & 0xFF != ord('q'): pass cv2.destroyAllWindows() def extract_crossword( file_name, gaussian_blur_size=11, adaptive_threshold_block_size=11, adaptive_threshold_mean_adjustment=2, not_square=False, num_dilations=1, contour_erosion_kernel_size=5, contour_erosion_iterations=5, line_detector_element_size=51, sampling_block_size_ratio=0.25, sampling_threshold_quantile=0.3, sampling_threshold=None, grid_line_thickness=4, grid_square_size=64, grid_border_size=20, ): original = cv2.imread(file_name, cv2.IMREAD_GRAYSCALE) if original is None: raise RuntimeError("Failed to load image") img = preprocess_image(original, gaussian_blur_size, adaptive_threshold_block_size, adaptive_threshold_mean_adjustment, num_dilations) biggest = find_biggest_contour(img) biggest = erode_contour(img.shape, biggest, contour_erosion_kernel_size, contour_erosion_iterations) top_left, top_right, bottom_right, bottom_left = get_contour_corners(img, biggest) img = extract_square(img, top_left, top_right, bottom_right, bottom_left) horiz_elem = cv2.getStructuringElement(cv2.MORPH_RECT, (line_detector_element_size, 1)) horiz_lines = cv2.erode(img, horiz_elem) horiz_lines = cv2.dilate(horiz_lines, horiz_elem) vert_elem = cv2.getStructuringElement(cv2.MORPH_RECT, (1, line_detector_element_size)) vert_lines = cv2.erode(img, vert_elem) vert_lines = cv2.dilate(vert_lines, vert_elem) row_fft = np.fft.fft(np.sum(horiz_lines, axis=1)) col_fft = np.fft.fft(np.sum(vert_lines, axis=0)) if not_square: num_rows = get_fundamental_frequency([row_fft]) num_cols = get_fundamental_frequency([col_fft]) else: num_rows = num_cols = get_fundamental_frequency([row_fft, col_fft]) block_img = extract_square(original, top_left, top_right, bottom_right, bottom_left) if sampling_threshold is None: sampling_threshold = get_threshold_from_quantile(block_img, sampling_threshold_quantile) else: sampling_threshold = sampling_threshold grid_colours = extract_grid_colours(block_img, num_rows, num_cols, sampling_block_size_ratio) warning, grid = grid_colours_to_blocks(grid_colours, num_rows, num_cols, sampling_threshold) step = grid_square_size + grid_line_thickness grid_height = num_rows * step + grid_line_thickness grid_width = num_cols * step + grid_line_thickness output = np.full([2 * grid_border_size + grid_height, 2 * grid_border_size + grid_width], 255, dtype=np.uint8) cv2.rectangle(output, (grid_border_size, grid_border_size), (grid_border_size + grid_width - 1, grid_border_size + grid_height - 1), 0, -1) for row in range(num_rows): y = row * step + grid_line_thickness + grid_border_size for col in range(num_cols): if grid[row][col] == 0: x = col * step + grid_line_thickness + grid_border_size cv2.rectangle(output, (x, y), (x + grid_square_size - 1, y + grid_square_size - 1), 255, -1) _, png = cv2.imencode('.png', output) return png.tobytes(), warning