Нема описа

xword.py 8.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. import math
  2. import cv2
  3. import numpy as np
  4. import peakutils
  5. import copy
  6. import argparse
  7. def preprocess_image(original, gaussian_blur_size, adaptive_threshold_block_size, adaptive_threshold_mean_adjustment, num_dilations):
  8. img = cv2.GaussianBlur(original, (gaussian_blur_size, gaussian_blur_size), 0)
  9. img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, adaptive_threshold_block_size, adaptive_threshold_mean_adjustment)
  10. kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]], np.uint8)
  11. for i in range(num_dilations):
  12. img = cv2.dilate(img, kernel)
  13. return img
  14. def find_biggest_contour(img):
  15. contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  16. biggest = None
  17. max_area = 0
  18. for contour in contours:
  19. area = cv2.contourArea(contour)
  20. if area > max_area:
  21. biggest = contour
  22. max_area = area
  23. return biggest
  24. def erode_contour(img_shape, contour, kernel_size, iterations):
  25. contour_img = np.zeros(img_shape, dtype=np.uint8)
  26. cv2.drawContours(contour_img, [contour], 0, 255, -1)
  27. kernel = np.ones((kernel_size, kernel_size), dtype=np.uint8)
  28. contour_img = cv2.erode(contour_img, kernel, iterations=iterations)
  29. contour_img = cv2.dilate(contour_img, kernel, iterations=iterations)
  30. return find_biggest_contour(contour_img)
  31. def get_contour_corners(img, contour):
  32. height, width = img.shape
  33. top_left = [width, height]
  34. top_right = [-1, height]
  35. bottom_left = [width, -1]
  36. bottom_right = [-1, -1]
  37. for vertex in contour:
  38. point = vertex[0]
  39. sum = point[0] + point[1]
  40. diff = point[0] - point[1]
  41. if sum < top_left[0] + top_left[1]:
  42. top_left = point
  43. if sum > bottom_right[0] + bottom_right[1]:
  44. bottom_right = point
  45. if diff < bottom_left[0] - bottom_left[1]:
  46. bottom_left = point
  47. if diff > top_right[0] - top_right[1]:
  48. top_right = point
  49. return top_left, top_right, bottom_right, bottom_left
  50. def segment_length(p1, p2):
  51. dx = p1[0] - p2[0]
  52. dy = p1[1] - p2[1]
  53. return math.sqrt(dx ** 2 + dy ** 2)
  54. def get_longest_side(poly):
  55. previous = poly[-1]
  56. max = 0
  57. for current in poly:
  58. len = segment_length(previous, current)
  59. if len > max:
  60. max = len
  61. previous = current
  62. return max
  63. def extract_square(img, top_left, top_right, bottom_right, bottom_left):
  64. src = [top_left, top_right, bottom_right, bottom_left]
  65. longest = get_longest_side(src)
  66. dst = [[0, 0], [longest - 1, 0], [longest - 1, longest - 1], [0, longest - 1]]
  67. m = cv2.getPerspectiveTransform(np.array(src, dtype=np.float32), np.array(dst, dtype=np.float32))
  68. return cv2.warpPerspective(img, m, (int(longest), int(longest)))
  69. def get_fundamental_frequency(fft):
  70. mag = abs(fft[0:len(fft) // 2])
  71. mag[0] = 0
  72. return int(np.argmax(mag))
  73. def get_threshold_from_quantile(img, quantile):
  74. height, width = img.shape
  75. num_pixels = height * width
  76. pixels = np.sort(np.reshape(img, num_pixels))
  77. return pixels[int(num_pixels * quantile)]
  78. def extract_grid_colours(img, num_rows, num_cols, sampling_block_size_ratio):
  79. height, width = img.shape
  80. row_delta = int(height * sampling_block_size_ratio / num_rows / 2)
  81. col_delta = int(width * sampling_block_size_ratio / num_cols / 2)
  82. sampling_block_area = (2 * row_delta + 1) * (2 * col_delta + 1)
  83. grid = []
  84. for row in range(num_rows):
  85. line = []
  86. y = int(((row + 0.5) / num_rows) * height)
  87. for col in range(num_cols):
  88. sum = 0
  89. x = int(((col + 0.5) / num_cols) * width)
  90. for dy in range(-row_delta, row_delta + 1):
  91. for dx in range(-col_delta, col_delta + 1):
  92. sum += img[y + dy, x + dx]
  93. line.append(sum / sampling_block_area)
  94. grid.append(line)
  95. return grid
  96. def grid_colours_to_blocks(grid_colours, num_rows, num_cols, sampling_threshold):
  97. grid = copy.deepcopy(grid_colours)
  98. warning = False
  99. for row in range(round(num_rows / 2)):
  100. for col in range(num_cols):
  101. row2 = num_rows - row - 1
  102. col2 = num_cols - col - 1
  103. delta1 = grid_colours[row][col] - sampling_threshold
  104. delta2 = grid_colours[row2][col2] - sampling_threshold
  105. if (delta1 > 0) and (delta2 > 0):
  106. block = 0
  107. elif (delta1 < 0) and (delta2 < 0):
  108. block = 1
  109. else:
  110. warning = True
  111. if abs(delta1) > abs(delta2):
  112. block = 1 if delta1 < 0 else 0
  113. else:
  114. block = 1 if delta2 < 0 else 0
  115. grid[row][col] = grid[row2][col2] = block
  116. return warning, grid
  117. def draw_point(image, point, colour):
  118. height, width, _ = image.shape
  119. for dx in range(-10, 11):
  120. for dy in range(-10, 11):
  121. x = point[0] + dx
  122. y = point[1] + dy
  123. if (x >= 0) and (y >= 0) and (x < width) and (y < height):
  124. image[y, x] = colour
  125. def show_image(image):
  126. cv2.namedWindow('xword', cv2.WINDOW_NORMAL)
  127. cv2.imshow('xword', image)
  128. while cv2.waitKey() & 0xFF != ord('q'):
  129. pass
  130. cv2.destroyAllWindows()
  131. def extract_crossword(
  132. file_name,
  133. gaussian_blur_size=11,
  134. adaptive_threshold_block_size=11,
  135. adaptive_threshold_mean_adjustment=2,
  136. square=True,
  137. num_dilations=1,
  138. contour_erosion_kernel_size=5,
  139. contour_erosion_iterations=5,
  140. line_detector_element_size=51,
  141. sampling_block_size_ratio=0.25,
  142. sampling_threshold_quantile=0.3,
  143. sampling_threshold=None,
  144. grid_line_thickness=4,
  145. grid_square_size=64,
  146. grid_border_size=20,
  147. ):
  148. warnings = []
  149. original = cv2.imread(file_name, cv2.IMREAD_GRAYSCALE)
  150. if original is None:
  151. raise RuntimeError("Failed to load image")
  152. img = preprocess_image(original, gaussian_blur_size, adaptive_threshold_block_size, adaptive_threshold_mean_adjustment, num_dilations)
  153. biggest = find_biggest_contour(img)
  154. biggest = erode_contour(img.shape, biggest, contour_erosion_kernel_size, contour_erosion_iterations)
  155. top_left, top_right, bottom_right, bottom_left = get_contour_corners(img, biggest)
  156. img = extract_square(img, top_left, top_right, bottom_right, bottom_left)
  157. horiz_elem = cv2.getStructuringElement(cv2.MORPH_RECT, (line_detector_element_size, 1))
  158. horiz_lines = cv2.erode(img, horiz_elem)
  159. horiz_lines = cv2.dilate(horiz_lines, horiz_elem)
  160. vert_elem = cv2.getStructuringElement(cv2.MORPH_RECT, (1, line_detector_element_size))
  161. vert_lines = cv2.erode(img, vert_elem)
  162. vert_lines = cv2.dilate(vert_lines, vert_elem)
  163. row_fft = np.fft.fft(np.sum(horiz_lines, axis=1))
  164. col_fft = np.fft.fft(np.sum(vert_lines, axis=0))
  165. num_rows = get_fundamental_frequency(row_fft)
  166. num_cols = get_fundamental_frequency(col_fft)
  167. if square and (num_rows != num_cols):
  168. warnings.append("Crossword is not square")
  169. block_img = extract_square(original, top_left, top_right, bottom_right, bottom_left)
  170. if sampling_threshold is None:
  171. sampling_threshold = get_threshold_from_quantile(block_img, sampling_threshold_quantile)
  172. else:
  173. sampling_threshold = sampling_threshold
  174. grid_colours = extract_grid_colours(block_img, num_rows, num_cols, sampling_block_size_ratio)
  175. warning, grid = grid_colours_to_blocks(grid_colours, num_rows, num_cols, sampling_threshold)
  176. if warning:
  177. warnings.append("Some blocks may be the wrong colour")
  178. step = grid_square_size + grid_line_thickness
  179. grid_height = num_rows * step + grid_line_thickness
  180. grid_width = num_cols * step + grid_line_thickness
  181. output = np.full([2 * grid_border_size + grid_height, 2 * grid_border_size + grid_width], 255, dtype=np.uint8)
  182. cv2.rectangle(output, (grid_border_size, grid_border_size), (grid_border_size + grid_width - 1, grid_border_size + grid_height - 1), 0, -1)
  183. for row in range(num_rows):
  184. y = row * step + grid_line_thickness + grid_border_size
  185. for col in range(num_cols):
  186. if grid[row][col] == 0:
  187. x = col * step + grid_line_thickness + grid_border_size
  188. cv2.rectangle(output, (x, y), (x + grid_square_size - 1, y + grid_square_size - 1), 255, -1)
  189. _, png = cv2.imencode('.png', output)
  190. return png.tobytes(), warnings