andrew
/
xword


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
							import math
import cv2
import numpy as np
import copy
import argparse


def non_greys_to_white(img, threshold=48):
    b, g, r = cv2.split(img)
    rgb_diff = cv2.subtract(cv2.max(cv2.max(b, g), r), cv2.min(cv2.min(b, g), r))
    filtered = img.copy()
    filtered[np.where(rgb_diff > threshold)] = (255, 255, 255)
    return filtered


def load_image_as_greyscale(file_name, filter_colours, colour_filter_threshold):
    img = cv2.imread(file_name)
    if img is None:
        raise RuntimeError("Failed to load image")

    if filter_colours:
        img = non_greys_to_white(img, colour_filter_threshold)

    return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)


def preprocess_image(original, gaussian_blur_size, adaptive_threshold_block_size, adaptive_threshold_mean_adjustment, num_dilations):
    img = cv2.GaussianBlur(original, (gaussian_blur_size, gaussian_blur_size), 0)
    img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, adaptive_threshold_block_size, adaptive_threshold_mean_adjustment)
    kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]], np.uint8)
    for i in range(num_dilations):
        img = cv2.dilate(img, kernel)
    return img


def morph_open_image(img, kernel_size, iterations=1):
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, kernel_size)
    return cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel, iterations=iterations)


def get_fundamental_frequency(fft):
    mag = abs(fft[0:len(fft) // 2])
    mag[0] = 0
    return int(np.argmax(mag))


def get_line_fft(img, line_detector_element_size, axis):
    lines = morph_open_image(img, (line_detector_element_size, 1) if axis == 1 else (1, line_detector_element_size))
    return np.fft.fft(np.sum(lines, axis=axis))


def get_line_frequency(img, line_detector_element_size, axis):
    return get_fundamental_frequency(get_line_fft(img, line_detector_element_size, axis))


def find_biggest_contour(img):
    contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    biggest = None
    max_area = 0
    for contour in contours:
        area = cv2.contourArea(contour)
        if area > max_area:
            biggest = contour
            max_area = area

    return biggest


def erode_contour(img_shape, contour, erosion_kernel_size, iterations):
    contour_img = np.zeros(img_shape, dtype=np.uint8)
    cv2.drawContours(contour_img, [contour], 0, 255, -1)
    contour_img = morph_open_image(contour_img, (erosion_kernel_size, erosion_kernel_size), iterations)
    return find_biggest_contour(contour_img)


def get_contour_corners(img, contour):
    height, width = img.shape

    top_left = [width, height]
    top_right = [-1, height]
    bottom_left = [width, -1]
    bottom_right = [-1, -1]

    for vertex in contour:
        point = vertex[0]
        sum = point[0] + point[1]
        diff = point[0] - point[1]
        if sum < top_left[0] + top_left[1]:
            top_left = point
        if sum > bottom_right[0] + bottom_right[1]:
            bottom_right = point
        if diff < bottom_left[0] - bottom_left[1]:
            bottom_left = point
        if diff > top_right[0] - top_right[1]:
            top_right = point

    return top_left, top_right, bottom_right, bottom_left


def segment_length(p1, p2):
    dx = p1[0] - p2[0]
    dy = p1[1] - p2[1]
    return math.sqrt(dx ** 2 + dy ** 2)


def get_longest_side(poly):
    previous = poly[-1]
    max = 0
    for current in poly:
        len = segment_length(previous, current)
        if len > max:
            max = len
        previous = current
    return max


def extract_square(img, top_left, top_right, bottom_right, bottom_left):
    src = [top_left, top_right, bottom_right, bottom_left]
    longest = get_longest_side(src)
    dst = [[0, 0], [longest - 1, 0], [longest - 1, longest - 1], [0, longest - 1]]
    m = cv2.getPerspectiveTransform(np.array(src, dtype=np.float32), np.array(dst, dtype=np.float32))
    return cv2.warpPerspective(img, m, (int(longest), int(longest)))


def get_threshold_from_quantile(img, quantile):
    height, width = img.shape
    num_pixels = height * width
    pixels = np.sort(np.reshape(img, num_pixels))
    return pixels[int(num_pixels * quantile)]


def extract_grid_colours(img, num_rows, num_cols, sampling_block_size_ratio):
    height, width = img.shape
    row_delta = int(height * sampling_block_size_ratio / num_rows / 2)
    col_delta = int(width * sampling_block_size_ratio / num_cols / 2)
    sampling_block_area = (2 * row_delta + 1) * (2 * col_delta + 1)

    grid = []
    for row in range(num_rows):
        line = []
        y = int(((row + 0.5) / num_rows) * height)
        for col in range(num_cols):
            sum = 0
            x = int(((col + 0.5) / num_cols) * width)
            for dy in range(-row_delta, row_delta + 1):
                for dx in range(-col_delta, col_delta + 1):
                    sum += img[y + dy, x + dx]
            line.append(sum / sampling_block_area)
        grid.append(line)

    return grid


def grid_colours_to_blocks(grid_colours, num_rows, num_cols, sampling_threshold):
    grid = copy.deepcopy(grid_colours)
    warning = False
    for row in range(round(num_rows / 2)):
        for col in range(num_cols):
            row2 = num_rows - row - 1
            col2 = num_cols - col - 1
            delta1 = grid_colours[row][col] - sampling_threshold
            delta2 = grid_colours[row2][col2] - sampling_threshold

            if (delta1 > 0) and (delta2 > 0):
                block = 0
            elif (delta1 < 0) and (delta2 < 0):
                block = 1
            else:
                warning = True
                if abs(delta1) > abs(delta2):
                    block = 1 if delta1 < 0 else 0
                else:
                    block = 1 if delta2 < 0 else 0

            grid[row][col] = grid[row2][col2] = block

    return warning, grid


def draw_point(image, point, colour):
    height, width, _ = image.shape
    for dx in range(-10, 11):
        for dy in range(-10, 11):
            x = point[0] + dx
            y = point[1] + dy
            if (x >= 0) and (y >= 0) and (x < width) and (y < height):
                image[y, x] = colour


def show_image(image):
    cv2.namedWindow('xword', cv2.WINDOW_NORMAL)
    cv2.imshow('xword', image)
    while cv2.waitKey() & 0xFF != ord('q'):
        pass
    cv2.destroyAllWindows()


def extract_crossword(
    file_name,
    filter_colours=False,
    colour_filter_threshold=48,
    gaussian_blur_size=11,
    adaptive_threshold_block_size=11,
    adaptive_threshold_mean_adjustment=2,
    square=True,
    num_dilations=1,
    contour_erosion_kernel_size=5,
    contour_erosion_iterations=6,
    line_detector_element_size=51,
    sampling_block_size_ratio=0.25,
    sampling_threshold_quantile=0.3,
    sampling_threshold=None,
    grid_line_thickness=4,
    grid_square_size=64,
    grid_border_size=20,
):
    warnings = []

    original = load_image_as_greyscale(file_name, filter_colours, colour_filter_threshold)

    img = preprocess_image(original, gaussian_blur_size, adaptive_threshold_block_size, adaptive_threshold_mean_adjustment, num_dilations)

    biggest = find_biggest_contour(img)
    biggest = erode_contour(img.shape, biggest, contour_erosion_kernel_size, contour_erosion_iterations)

    top_left, top_right, bottom_right, bottom_left = get_contour_corners(img, biggest)

    img = extract_square(img, top_left, top_right, bottom_right, bottom_left)

    num_rows = get_line_frequency(img, line_detector_element_size, 1)
    num_cols = get_line_frequency(img, line_detector_element_size, 0)
    if square and (num_rows != num_cols):
        warnings.append("Crossword is not square")

    block_img = extract_square(original, top_left, top_right, bottom_right, bottom_left)

    if sampling_threshold is None:
        sampling_threshold = get_threshold_from_quantile(block_img, sampling_threshold_quantile)
    else:
        sampling_threshold = sampling_threshold

    grid_colours = extract_grid_colours(block_img, num_rows, num_cols, sampling_block_size_ratio)
    warning, grid = grid_colours_to_blocks(grid_colours, num_rows, num_cols, sampling_threshold)
    if warning:
        warnings.append("Some blocks may be the wrong colour")

    step = grid_square_size + grid_line_thickness
    grid_height = num_rows * step + grid_line_thickness
    grid_width = num_cols * step + grid_line_thickness
    output = np.full([2 * grid_border_size + grid_height, 2 * grid_border_size + grid_width], 255, dtype=np.uint8)
    cv2.rectangle(output, (grid_border_size, grid_border_size), (grid_border_size + grid_width - 1, grid_border_size + grid_height - 1), 0, -1)
    for row in range(num_rows):
        y = row * step + grid_line_thickness + grid_border_size
        for col in range(num_cols):
            if grid[row][col] == 0:
                x = col * step + grid_line_thickness + grid_border_size
                cv2.rectangle(output, (x, y), (x + grid_square_size - 1, y + grid_square_size - 1), 255, -1)

    _, png = cv2.imencode('.png', output)
    return png.tobytes(), warnings