|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+import math
|
|
|
2
|
+import cv2
|
|
|
3
|
+import numpy as np
|
|
|
4
|
+import peakutils
|
|
|
5
|
+import copy
|
|
|
6
|
+import argparse
|
|
|
7
|
+
|
|
|
8
|
+
|
|
|
9
|
+def preprocess_image(original, gaussian_blur_size, adaptive_threshold_block_size, adaptive_threshold_mean_adjustment, num_dilations):
|
|
|
10
|
+ img = cv2.GaussianBlur(original, (gaussian_blur_size, gaussian_blur_size), 0)
|
|
|
11
|
+ img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, adaptive_threshold_block_size, adaptive_threshold_mean_adjustment)
|
|
|
12
|
+ kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]], np.uint8)
|
|
|
13
|
+ for i in range(num_dilations):
|
|
|
14
|
+ img = cv2.dilate(img, kernel)
|
|
|
15
|
+ return img
|
|
|
16
|
+
|
|
|
17
|
+
|
|
|
18
|
+def find_biggest_contour(img):
|
|
|
19
|
+ contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
|
20
|
+
|
|
|
21
|
+ biggest = None
|
|
|
22
|
+ max_area = 0
|
|
|
23
|
+ for contour in contours:
|
|
|
24
|
+ area = cv2.contourArea(contour)
|
|
|
25
|
+ if area > max_area:
|
|
|
26
|
+ biggest = contour
|
|
|
27
|
+ max_area = area
|
|
|
28
|
+
|
|
|
29
|
+ return biggest
|
|
|
30
|
+
|
|
|
31
|
+
|
|
|
32
|
+def erode_contour(img_shape, contour, kernel_size, iterations):
|
|
|
33
|
+ contour_img = np.zeros(img_shape, dtype=np.uint8)
|
|
|
34
|
+ cv2.drawContours(contour_img, [contour], 0, 255, -1)
|
|
|
35
|
+
|
|
|
36
|
+ kernel = np.ones((kernel_size, kernel_size), dtype=np.uint8)
|
|
|
37
|
+ contour_img = cv2.erode(contour_img, kernel, iterations=iterations)
|
|
|
38
|
+ contour_img = cv2.dilate(contour_img, kernel, iterations=iterations)
|
|
|
39
|
+ return find_biggest_contour(contour_img)
|
|
|
40
|
+
|
|
|
41
|
+
|
|
|
42
|
+def get_contour_corners(img, contour):
|
|
|
43
|
+ height, width = img.shape
|
|
|
44
|
+
|
|
|
45
|
+ top_left = [width, height]
|
|
|
46
|
+ top_right = [-1, height]
|
|
|
47
|
+ bottom_left = [width, -1]
|
|
|
48
|
+ bottom_right = [-1, -1]
|
|
|
49
|
+
|
|
|
50
|
+ for vertex in contour:
|
|
|
51
|
+ point = vertex[0]
|
|
|
52
|
+ sum = point[0] + point[1]
|
|
|
53
|
+ diff = point[0] - point[1]
|
|
|
54
|
+ if sum < top_left[0] + top_left[1]:
|
|
|
55
|
+ top_left = point
|
|
|
56
|
+ if sum > bottom_right[0] + bottom_right[1]:
|
|
|
57
|
+ bottom_right = point
|
|
|
58
|
+ if diff < bottom_left[0] - bottom_left[1]:
|
|
|
59
|
+ bottom_left = point
|
|
|
60
|
+ if diff > top_right[0] - top_right[1]:
|
|
|
61
|
+ top_right = point
|
|
|
62
|
+
|
|
|
63
|
+ return top_left, top_right, bottom_right, bottom_left
|
|
|
64
|
+
|
|
|
65
|
+
|
|
|
66
|
+def segment_length(p1, p2):
|
|
|
67
|
+ dx = p1[0] - p2[0]
|
|
|
68
|
+ dy = p1[1] - p2[1]
|
|
|
69
|
+ return math.sqrt(dx ** 2 + dy ** 2)
|
|
|
70
|
+
|
|
|
71
|
+
|
|
|
72
|
+def get_longest_side(poly):
|
|
|
73
|
+ previous = poly[-1]
|
|
|
74
|
+ max = 0
|
|
|
75
|
+ for current in poly:
|
|
|
76
|
+ len = segment_length(previous, current)
|
|
|
77
|
+ if len > max:
|
|
|
78
|
+ max = len
|
|
|
79
|
+ previous = current
|
|
|
80
|
+ return max
|
|
|
81
|
+
|
|
|
82
|
+
|
|
|
83
|
+def extract_square(img, top_left, top_right, bottom_right, bottom_left):
|
|
|
84
|
+ src = [top_left, top_right, bottom_right, bottom_left]
|
|
|
85
|
+ longest = get_longest_side(src)
|
|
|
86
|
+ dst = [[0, 0], [longest - 1, 0], [longest - 1, longest - 1], [0, longest - 1]]
|
|
|
87
|
+ m = cv2.getPerspectiveTransform(np.array(src, dtype=np.float32), np.array(dst, dtype=np.float32))
|
|
|
88
|
+ return cv2.warpPerspective(img, m, (int(longest), int(longest)))
|
|
|
89
|
+
|
|
|
90
|
+
|
|
|
91
|
+def get_fundamental_frequency(ffts):
|
|
|
92
|
+ all_peak_indexes = []
|
|
|
93
|
+ max_peak_count = None
|
|
|
94
|
+ f_est = None
|
|
|
95
|
+ for fft in ffts:
|
|
|
96
|
+ # Use the upper half of the fft array, since this seems to always exclude the DC component.
|
|
|
97
|
+ peak_indexes = peakutils.indexes(np.flip(abs(fft[len(fft) // 2:])), thres=0.3)
|
|
|
98
|
+ peak_count = len(peak_indexes)
|
|
|
99
|
+ if peak_count < 1:
|
|
|
100
|
+ return None
|
|
|
101
|
+ if (max_peak_count is None) or (peak_count > max_peak_count):
|
|
|
102
|
+ max_peak_count = peak_count
|
|
|
103
|
+ f_est = round(peak_indexes[peak_count - 1] / peak_count)
|
|
|
104
|
+ all_peak_indexes.append(peak_indexes)
|
|
|
105
|
+
|
|
|
106
|
+ if f_est < 2:
|
|
|
107
|
+ return None
|
|
|
108
|
+
|
|
|
109
|
+ min_err = None
|
|
|
110
|
+ f = None
|
|
|
111
|
+ for delta in range(-2, 3):
|
|
|
112
|
+ err = 0
|
|
|
113
|
+ f_current = f_est + delta
|
|
|
114
|
+ for peak_indexes in all_peak_indexes:
|
|
|
115
|
+ for i, peak_index in enumerate(peak_indexes):
|
|
|
116
|
+ err += (peak_index - f_current * (i + 1)) ** 2
|
|
|
117
|
+ if (min_err is None) or (err < min_err):
|
|
|
118
|
+ min_err = err
|
|
|
119
|
+ f = f_current
|
|
|
120
|
+
|
|
|
121
|
+ return int(f)
|
|
|
122
|
+
|
|
|
123
|
+
|
|
|
124
|
+def get_threshold_from_quantile(img, quantile):
|
|
|
125
|
+ height, width = img.shape
|
|
|
126
|
+ num_pixels = height * width
|
|
|
127
|
+ pixels = np.sort(np.reshape(img, num_pixels))
|
|
|
128
|
+ return pixels[int(num_pixels * quantile)]
|
|
|
129
|
+
|
|
|
130
|
+
|
|
|
131
|
+def extract_grid_colours(img, num_rows, num_cols, sampling_block_size_ratio):
|
|
|
132
|
+ height, width = img.shape
|
|
|
133
|
+ row_delta = int(height * sampling_block_size_ratio / num_rows / 2)
|
|
|
134
|
+ col_delta = int(width * sampling_block_size_ratio / num_cols / 2)
|
|
|
135
|
+ sampling_block_area = (2 * row_delta + 1) * (2 * col_delta + 1)
|
|
|
136
|
+
|
|
|
137
|
+ grid = []
|
|
|
138
|
+ for row in range(num_rows):
|
|
|
139
|
+ line = []
|
|
|
140
|
+ y = int(((row + 0.5) / num_rows) * height)
|
|
|
141
|
+ for col in range(num_cols):
|
|
|
142
|
+ sum = 0
|
|
|
143
|
+ x = int(((col + 0.5) / num_cols) * width)
|
|
|
144
|
+ for dy in range(-row_delta, row_delta + 1):
|
|
|
145
|
+ for dx in range(-col_delta, col_delta + 1):
|
|
|
146
|
+ sum += img[y + dy, x + dx]
|
|
|
147
|
+ line.append(sum / sampling_block_area)
|
|
|
148
|
+ grid.append(line)
|
|
|
149
|
+
|
|
|
150
|
+ return grid
|
|
|
151
|
+
|
|
|
152
|
+
|
|
|
153
|
+def grid_colours_to_blocks(grid_colours, num_rows, num_cols, sampling_threshold):
|
|
|
154
|
+ grid = copy.deepcopy(grid_colours)
|
|
|
155
|
+ warning = False
|
|
|
156
|
+ for row in range(round(num_rows / 2)):
|
|
|
157
|
+ for col in range(num_cols):
|
|
|
158
|
+ row2 = num_rows - row - 1
|
|
|
159
|
+ col2 = num_cols - col - 1
|
|
|
160
|
+ delta1 = grid_colours[row][col] - sampling_threshold
|
|
|
161
|
+ delta2 = grid_colours[row2][col2] - sampling_threshold
|
|
|
162
|
+
|
|
|
163
|
+ if (delta1 > 0) and (delta2 > 0):
|
|
|
164
|
+ block = 0
|
|
|
165
|
+ elif (delta1 < 0) and (delta2 < 0):
|
|
|
166
|
+ block = 1
|
|
|
167
|
+ else:
|
|
|
168
|
+ warning = True
|
|
|
169
|
+ if abs(delta1) > abs(delta2):
|
|
|
170
|
+ block = 1 if delta1 < 0 else 0
|
|
|
171
|
+ else:
|
|
|
172
|
+ block = 1 if delta2 < 0 else 0
|
|
|
173
|
+
|
|
|
174
|
+ grid[row][col] = grid[row2][col2] = block
|
|
|
175
|
+
|
|
|
176
|
+ return warning, grid
|
|
|
177
|
+
|
|
|
178
|
+
|
|
|
179
|
+def draw_point(image, point, colour):
|
|
|
180
|
+ height, width, _ = image.shape
|
|
|
181
|
+ for dx in range(-10, 11):
|
|
|
182
|
+ for dy in range(-10, 11):
|
|
|
183
|
+ x = point[0] + dx
|
|
|
184
|
+ y = point[1] + dy
|
|
|
185
|
+ if (x >= 0) and (y >= 0) and (x < width) and (y < height):
|
|
|
186
|
+ image[y, x] = colour
|
|
|
187
|
+
|
|
|
188
|
+
|
|
|
189
|
+def show_image(image):
|
|
|
190
|
+ cv2.namedWindow('xword', cv2.WINDOW_NORMAL)
|
|
|
191
|
+ cv2.imshow('xword', image)
|
|
|
192
|
+ while cv2.waitKey() & 0xFF != ord('q'):
|
|
|
193
|
+ pass
|
|
|
194
|
+ cv2.destroyAllWindows()
|
|
|
195
|
+
|
|
|
196
|
+
|
|
|
197
|
+def extract_crossword(
|
|
|
198
|
+ file_name,
|
|
|
199
|
+ gaussian_blur_size=11,
|
|
|
200
|
+ adaptive_threshold_block_size=11,
|
|
|
201
|
+ adaptive_threshold_mean_adjustment=2,
|
|
|
202
|
+ not_square=False,
|
|
|
203
|
+ num_dilations=1,
|
|
|
204
|
+ contour_erosion_kernel_size=5,
|
|
|
205
|
+ contour_erosion_iterations=5,
|
|
|
206
|
+ line_detector_element_size=51,
|
|
|
207
|
+ sampling_block_size_ratio=0.25,
|
|
|
208
|
+ sampling_threshold_quantile=0.3,
|
|
|
209
|
+ sampling_threshold=None,
|
|
|
210
|
+ grid_line_thickness=4,
|
|
|
211
|
+ grid_square_size=64,
|
|
|
212
|
+ grid_border_size=20,
|
|
|
213
|
+):
|
|
|
214
|
+ original = cv2.imread(file_name, cv2.IMREAD_GRAYSCALE)
|
|
|
215
|
+ if original is None:
|
|
|
216
|
+ raise RuntimeError("Failed to load image")
|
|
|
217
|
+
|
|
|
218
|
+ img = preprocess_image(original, gaussian_blur_size, adaptive_threshold_block_size, adaptive_threshold_mean_adjustment, num_dilations)
|
|
|
219
|
+
|
|
|
220
|
+ biggest = find_biggest_contour(img)
|
|
|
221
|
+ biggest = erode_contour(img.shape, biggest, contour_erosion_kernel_size, contour_erosion_iterations)
|
|
|
222
|
+
|
|
|
223
|
+ top_left, top_right, bottom_right, bottom_left = get_contour_corners(img, biggest)
|
|
|
224
|
+
|
|
|
225
|
+ img = extract_square(img, top_left, top_right, bottom_right, bottom_left)
|
|
|
226
|
+
|
|
|
227
|
+ horiz_elem = cv2.getStructuringElement(cv2.MORPH_RECT, (line_detector_element_size, 1))
|
|
|
228
|
+ horiz_lines = cv2.erode(img, horiz_elem)
|
|
|
229
|
+ horiz_lines = cv2.dilate(horiz_lines, horiz_elem)
|
|
|
230
|
+
|
|
|
231
|
+ vert_elem = cv2.getStructuringElement(cv2.MORPH_RECT, (1, line_detector_element_size))
|
|
|
232
|
+ vert_lines = cv2.erode(img, vert_elem)
|
|
|
233
|
+ vert_lines = cv2.dilate(vert_lines, vert_elem)
|
|
|
234
|
+
|
|
|
235
|
+ row_fft = np.fft.fft(np.sum(horiz_lines, axis=1))
|
|
|
236
|
+ col_fft = np.fft.fft(np.sum(vert_lines, axis=0))
|
|
|
237
|
+
|
|
|
238
|
+ if not_square:
|
|
|
239
|
+ num_rows = get_fundamental_frequency([row_fft])
|
|
|
240
|
+ num_cols = get_fundamental_frequency([col_fft])
|
|
|
241
|
+ else:
|
|
|
242
|
+ num_rows = num_cols = get_fundamental_frequency([row_fft, col_fft])
|
|
|
243
|
+
|
|
|
244
|
+ block_img = extract_square(original, top_left, top_right, bottom_right, bottom_left)
|
|
|
245
|
+
|
|
|
246
|
+ if sampling_threshold is None:
|
|
|
247
|
+ sampling_threshold = get_threshold_from_quantile(block_img, sampling_threshold_quantile)
|
|
|
248
|
+ else:
|
|
|
249
|
+ sampling_threshold = sampling_threshold
|
|
|
250
|
+
|
|
|
251
|
+ grid_colours = extract_grid_colours(block_img, num_rows, num_cols, sampling_block_size_ratio)
|
|
|
252
|
+ warning, grid = grid_colours_to_blocks(grid_colours, num_rows, num_cols, sampling_threshold)
|
|
|
253
|
+
|
|
|
254
|
+ step = grid_square_size + grid_line_thickness
|
|
|
255
|
+ grid_height = num_rows * step + grid_line_thickness
|
|
|
256
|
+ grid_width = num_cols * step + grid_line_thickness
|
|
|
257
|
+ output = np.full([2 * grid_border_size + grid_height, 2 * grid_border_size + grid_width], 255, dtype=np.uint8)
|
|
|
258
|
+ cv2.rectangle(output, (grid_border_size, grid_border_size), (grid_border_size + grid_width - 1, grid_border_size + grid_height - 1), 0, -1)
|
|
|
259
|
+ for row in range(num_rows):
|
|
|
260
|
+ y = row * step + grid_line_thickness + grid_border_size
|
|
|
261
|
+ for col in range(num_cols):
|
|
|
262
|
+ if grid[row][col] == 0:
|
|
|
263
|
+ x = col * step + grid_line_thickness + grid_border_size
|
|
|
264
|
+ cv2.rectangle(output, (x, y), (x + grid_square_size - 1, y + grid_square_size - 1), 255, -1)
|
|
|
265
|
+
|
|
|
266
|
+ _, png = cv2.imencode('.png', output)
|
|
|
267
|
+ return png.tobytes(), warning
|