├── .gitignore
└── AIF_AutoML
    ├── AIF_AutoML.ipynb
    ├── AIF_AutoML.pdf
    ├── AIF_AutoML.zip
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MIRAE-Laboratory/Lectures/e5426236dee68408a876b2f1e6f2fe17a653d9ec/.gitignore


--------------------------------------------------------------------------------
/AIF_AutoML/AIF_AutoML.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MIRAE-Laboratory/Lectures/e5426236dee68408a876b2f1e6f2fe17a653d9ec/AIF_AutoML/AIF_AutoML.pdf


--------------------------------------------------------------------------------
/AIF_AutoML/AIF_AutoML.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MIRAE-Laboratory/Lectures/e5426236dee68408a876b2f1e6f2fe17a653d9ec/AIF_AutoML/AIF_AutoML.zip


--------------------------------------------------------------------------------
/AIF_AutoML/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import glob
  4 | import inspect
  5 | import zipfile
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | import tensorflow as tf
  9 | import tensorflow_datasets as tfds  # https://www.tensorflow.org/datasets/catalog/overview?hl=ko
 10 | from tqdm import tqdm
 11 | from PIL import Image
 12 | import scipy.signal as signal
 13 | from scipy.interpolate import RegularGridInterpolator
 14 | 
 15 | tf.get_logger().setLevel('ERROR')
 16 | 
 17 | 
 18 | def load_data(name='mnist', split='train', verbose=True):
 19 |     try:
 20 |         ds, info = tfds.load(name=name, split=split, with_info=True)
 21 |         label_names = info.features['label'].names
 22 | 
 23 |         X, Y = [], []
 24 |         for xy in tqdm(tfds.as_numpy(ds)):
 25 |             X.append(xy['image'])
 26 |             Y.append(tf.keras.utils.to_categorical(xy['label'], num_classes=len(label_names), dtype='float32'))
 27 | 
 28 |     except Exception as e:
 29 |         print(f'Error: {e}')
 30 | 
 31 |     if verbose:
 32 |         try:
 33 |             print(name, f'X length (shape): {len(X)} ({X[0].shape}),'f'Y length (shape): {len(Y)} ({Y[0].shape})')
 34 |         except Exception as e:
 35 |             print(name, f'X length: {len(X)},'f'Y length: {len(Y)}')
 36 | 
 37 |     return X, Y, label_names, info
 38 | 
 39 | 
 40 | def load_image_data(dir_base_path='datasets/mnist', split='train', shuffle=True, random_seed=42, verbose=True):
 41 |     try:
 42 |         X, Y = [], []
 43 |         dir_split_path = os.path.join(dir_base_path, split)
 44 |         indices_label_names = sorted([x for x in os.listdir(dir_split_path) if '.' not in x])
 45 |         if len(indices_label_names) > 0:
 46 |             indice = [int(x.split('_')[0]) for x in indices_label_names]
 47 |             label_names = [x.split('_')[1] for x in indices_label_names]
 48 | 
 49 |             for index_label_name, index in zip(indices_label_names, indice):
 50 |                 dir_image_path = f'{dir_split_path}/{index_label_name}'
 51 |                 path_image_list = sorted(glob.glob(f'{dir_image_path}/*'))
 52 |                 for path_image in path_image_list:
 53 |                     X.append(np.array(Image.open(path_image)))
 54 |                     Y.append(tf.keras.utils.to_categorical(index, num_classes=len(label_names), dtype='float32'))
 55 |                 print('Loaded %d images from %s' % (len(path_image_list), dir_image_path))
 56 |         else:
 57 |             filename_list = sorted(os.listdir(dir_split_path))
 58 |             for filename in tqdm(filename_list):
 59 |                 X.append(np.array(Image.open(os.path.join(dir_split_path, filename))))
 60 |                 Y.append(float(filename.split('_')[0]))
 61 |             label_names = ''
 62 | 
 63 |         info = {'base_path': dir_base_path, 'split': split, 'label_names': label_names}
 64 | 
 65 |         if shuffle:
 66 |             dataset = list(zip(X, Y))
 67 |             np.random.seed(random_seed)
 68 |             np.random.shuffle(dataset)
 69 |             X, Y = zip(*dataset)
 70 | 
 71 |         if verbose:
 72 |             if label_names == '':
 73 |                 print(dir_split_path, f'X count (shape): {len(X)} ({X[0].shape}), Y count (shape): {len(Y)} ({Y[0]})')
 74 |             else:
 75 |                 print(dir_split_path, f'X count (shape): {len(X)} ({X[0].shape}), Y count (shape): {len(Y)} ({Y[0].shape})')
 76 | 
 77 |         return X, Y, label_names, info
 78 | 
 79 |     except Exception as e:
 80 |         print(f'Error: {e}')
 81 | 
 82 | 
 83 | def load_signal_data(dir_base_path='datasets/sigpeak', split='train', shuffle=True, random_seed=42, verbose=True):
 84 |     try:
 85 |         T, X, Y = [], [], []
 86 |         dir_split_path = os.path.join(dir_base_path, split)
 87 |         indices_label_names = sorted([x for x in os.listdir(dir_split_path) if '.' not in x])
 88 |         if len(indices_label_names) > 0:
 89 |             indice = [int(x.split('_')[0]) for x in indices_label_names]
 90 |             label_names = [x.split('_')[1] for x in indices_label_names]
 91 | 
 92 |             for index_label_name, index in zip(indices_label_names, indice):
 93 |                 dir_signal_path = f'{dir_split_path}/{index_label_name}'
 94 |                 path_signal_list = sorted(glob.glob(f'{dir_signal_path}/*'))
 95 |                 for path_signal in path_signal_list:
 96 |                     T.append(np.loadtxt(path_signal, delimiter=',', skiprows=1)[:, 0])
 97 |                     X.append(np.loadtxt(path_signal, delimiter=',', skiprows=1)[:, 1])
 98 |                     Y.append(tf.keras.utils.to_categorical(index, num_classes=len(label_names), dtype='float32'))
 99 |                 print('Loaded %d signals from %s' % (len(path_signal_list), dir_signal_path))
100 |         else:
101 |             filename_list = sorted(os.listdir(dir_split_path))
102 |             for filename in tqdm(filename_list):
103 |                 T.append(np.loadtxt(os.path.join(dir_split_path, filename), delimiter=',', skiprows=1)[:, 0])
104 |                 X.append(np.loadtxt(os.path.join(dir_split_path, filename), delimiter=',', skiprows=1)[:, 1])
105 |                 Y.append(float(filename.split('_')[0]))
106 |             label_names = ''
107 | 
108 |         info = {'base_path': dir_base_path, 'split': split, 'label_names': label_names}
109 | 
110 |         if shuffle:
111 |             dataset = list(zip(T, X, Y))
112 |             np.random.seed(random_seed)
113 |             np.random.shuffle(dataset)
114 |             T, X, Y = zip(*dataset)
115 | 
116 |         if verbose:
117 |             if label_names == '':
118 |                 print(dir_split_path, f'T count (shape): {len(T)} ({T[0].shape}), X count (shape): {len(X)} ({X[0].shape}), Y count (shape): {len(Y)} ({Y[0]})')
119 |             else:
120 |                 print(dir_split_path, f'T count (shape): {len(T)} ({T[0].shape}), X count (shape): {len(X)} ({X[0].shape}), Y count (shape): {len(Y)} ({Y[0].shape})')
121 | 
122 |         return T, X, Y, label_names, info
123 | 
124 |     except Exception as e:
125 |         print(f'Error: {e}')
126 | 
127 | 
128 | # plot image channelwisely
129 | def plot_images(images, labels, label_names, i_list=[]):
130 |     try:
131 |         images = np.array(images, dtype=np.uint8)
132 |         labels = np.array(labels)
133 |     except:
134 |         pass
135 |     i_list = range(len(images)) if i_list == [] else i_list
136 |     grayscale = False if images[0].shape[-1] == 3 else True
137 | 
138 |     if grayscale:
139 |         fig = plt.figure(figsize=(3 * len(i_list), 3))
140 |     else:
141 |         fig = plt.figure(figsize=(3 * 4, 3 * len(i_list)))
142 | 
143 |     ax_list = []
144 |     for i_ax, i_image in enumerate(i_list):
145 |         x = images[i_image]
146 |         y = labels[i_image].astype(np.float16)
147 | 
148 |         try:
149 |             y = f'{label_names[np.argmax(y)]} {y}'
150 |         except:
151 |             pass
152 | 
153 |         if grayscale:
154 |             ax = fig.add_subplot(1, len(i_list), i_ax + 1)
155 |             ax.imshow(x, cmap='gray')
156 |             ax.set_title(f"Image #{i_image}\nLabel: {y}\nShape: {x.shape}")
157 |             ax.set_axis_off()
158 |         else:
159 |             for i_ax_column in range(4):
160 |                 if i_ax_column == 0:
161 |                     ax = fig.add_subplot(len(i_list), 4, i_ax * 4 + 1)
162 |                     ax.imshow(x)
163 |                     ax.set_title(f"Image #{i_image}\nLabel: {y}\nShape: {x.shape}", loc='left')
164 |                     ax.set_axis_off()
165 |                 else:
166 |                     i_channel = i_ax_column - 1
167 |                     x_channel = x.copy()
168 |                     x_channel[:, :, [x for x in range(3) if x != i_channel]] = 0
169 |                     ax = fig.add_subplot(len(i_list), 4, i_ax * 4 + 1 + i_ax_column)
170 |                     ax.imshow(x_channel)
171 |                     ax.set_title(f"{['Red', 'Green', 'Blue'][i_channel]} Channel")
172 |                     ax.set_axis_off()
173 |         ax_list.append(ax)
174 | 
175 |     fig.tight_layout()
176 |     fig.show()
177 | 
178 | 
179 | # plot signals
180 | def plot_signals(axis, signals, labels, label_names, i_list=[], axis_name='Time [s]'):
181 |     try:
182 |         axis = np.array(axis)
183 |         signals = np.array(signals)
184 |         labels = np.array(labels)
185 |     except:
186 |         pass
187 |     i_list = range(len(signals)) if i_list == [] else i_list
188 | 
189 |     fig = plt.figure(figsize=(6, 3 * len(i_list)))
190 | 
191 |     ax_list = []
192 |     for i_ax, i_signal in enumerate(i_list):
193 |         a = axis[i_signal]
194 |         x = signals[i_signal]
195 |         y = labels[i_signal].astype(np.float16)
196 | 
197 |         try:
198 |             y = f'{label_names[np.argmax(y)]} {y}'
199 |         except:
200 |             pass
201 | 
202 |         ax = fig.add_subplot(len(i_list), 1, i_ax + 1)
203 |         ax.plot(a, x * 0, 'r')
204 |         ax.plot(a, x, linewidth=5)
205 |         ax.set_xlim(a[0], a[-1])
206 |         ax.set_xlabel(axis_name)
207 |         ax.set_ylabel('Value')
208 |         ax.set_title(f"Image #{i_signal}\nLabel: {y}\nShape: {x.shape}", loc='left')
209 |         ax.grid()
210 | 
211 |         ax_list.append(ax)
212 | 
213 |     fig.tight_layout()
214 |     fig.show()
215 | 
216 | 
217 | # prompt: python code to convert rgb image to grayscale image based on opencv
218 | def rgb_to_grayscale_image(image):
219 |     try:
220 |         if image.shape[-1] == 3:
221 |             return cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
222 |         else:
223 |             print('Error: The input image is not a RGB image')
224 |             return image
225 | 
226 |     except Exception as e:
227 |         print(f'Error: {e}')
228 |         return image
229 | 
230 | 
231 | # prompt: python code to resize and fill image based on opencv
232 | def resize_and_fill_image(image, target_size, fill_color=(0, 0, 0)):
233 |     try:
234 |         if image.shape[-1] == 1:
235 |             new_image = np.full((target_size[0], target_size[1]), fill_color[0], dtype=np.uint8)
236 |         else:
237 |             new_image = np.full((target_size[0], target_size[1], 3), fill_color, dtype=np.uint8)
238 | 
239 |         scale = min(target_size[0] / image.shape[0], target_size[1] / image.shape[1])
240 |         resized_image = cv2.resize(image, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
241 |         center = (target_size[0] // 2, target_size[1] // 2)
242 |         position = (center[0] - resized_image.shape[0] // 2, center[1] - resized_image.shape[1] // 2)
243 |         new_image[position[0]:position[0] + resized_image.shape[0], position[1]:position[1] + resized_image.shape[1]] = resized_image
244 |         return new_image
245 | 
246 |     except Exception as e:
247 |         print(f'Error: {e}')
248 |         return image
249 | 
250 | 
251 | # prompt: python code to resize and crop image based on opencv
252 | def resize_and_crop_image(image, target_size):
253 |     try:
254 |         scale = max(target_size[0] / image.shape[0], target_size[1] / image.shape[1])
255 |         resized_image = cv2.resize(image, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
256 |         start_x = (resized_image.shape[1] - target_size[1]) // 2
257 |         start_y = (resized_image.shape[0] - target_size[0]) // 2
258 |         end_x = start_x + target_size[1]
259 |         end_y = start_y + target_size[0]
260 |         cropped_image = resized_image[start_y:end_y, start_x:end_x]
261 |         return cropped_image
262 | 
263 |     except Exception as e:
264 |         print(f'Error: {e}')
265 |         return image
266 | 
267 | 
268 | def flip_image(image, mode='lr'):  # lr: left-right, ud: up-down
269 |     try:
270 |         if mode == 'lr':
271 |             flipped_image = np.fliplr(image)
272 |         elif mode == 'ud':
273 |             flipped_image = np.flipud(image)
274 |         else:
275 |             print('Error: The input mode is not valid (lr for left-right, ud for up-down)')
276 |             return image
277 |         return flipped_image
278 | 
279 |     except Exception as e:
280 |         print(f'Error: {e}')
281 |         return image
282 | 
283 | 
284 | # prompt: python code to rotate image based on opencv
285 | def rotate_image(image, angle_list=[], fill_color=(255, 255, 255)):
286 |     try:
287 |         center = (image.shape[1] // 2, image.shape[0] // 2)
288 |         angle = np.random.randint(0, 360) if angle_list == [] else -angle_list[np.random.randint(0, len(angle_list))]
289 |         rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
290 |         rotated_image = cv2.warpAffine(image, rotation_matrix, (image.shape[1], image.shape[0]), borderValue=fill_color)
291 |         return rotated_image
292 | 
293 |     except Exception as e:
294 |         print(f'Error: {e}')
295 |         return image
296 | 
297 | 
298 | def mixup_images(images, labels, alpha=0.5):
299 |     try:
300 |         images, labels = np.array(images), np.array(labels)
301 |     except:
302 |         assert False, 'The input images should be same shape'
303 | 
304 |     indices_random = np.random.permutation(images.shape[0])
305 |     images_shuffled, labels_shuffled = images[indices_random], labels[indices_random]
306 | 
307 |     ratio = np.random.beta(alpha, alpha, images.shape[0]) if alpha > 0 else 1.0
308 | 
309 |     ratio = ratio.reshape(-1, 1, 1, 1)
310 |     images_mixed = ratio * images + (1 - ratio) * images_shuffled
311 | 
312 |     ratio = ratio.reshape(-1, 1)
313 |     labels_mixed = ratio * labels + (1 - ratio) * labels_shuffled
314 | 
315 |     return images_mixed, labels_mixed
316 | 
317 | 
318 | def cutmix_images(images, labels, beta=0.5):
319 |     try:
320 |         images, labels = np.array(images), np.array(labels)
321 |     except:
322 |         assert False, 'The input images should be same shape'
323 | 
324 |     indices_random = np.random.permutation(len(images))
325 |     target_a, target_b = labels, labels[indices_random]
326 | 
327 |     h, w = images[0].shape[:2]
328 |     ratio_cut = np.sqrt(1. - np.random.beta(beta, beta))
329 |     w_cut, h_cut = int(w * ratio_cut), int(h * ratio_cut)
330 |     x_cut, y_cut = np.random.randint(w), np.random.randint(h)
331 |     bbx1, bby1 = np.clip(x_cut - w_cut // 2, 0, w), np.clip(y_cut - h_cut // 2, 0, h)
332 |     bbx2, bby2 = np.clip(x_cut + w_cut // 2, 0, w), np.clip(y_cut + h_cut // 2, 0, h)
333 | 
334 |     images_mixed = images.copy()
335 |     images_mixed[:, bbx1:bbx2, bby1:bby2, :] = images[indices_random, bbx1:bbx2, bby1:bby2, :]
336 | 
337 |     ratio = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (h * w))
338 |     labels_mixed = target_a * ratio + target_b * (1. - ratio)
339 | 
340 |     return images_mixed, labels_mixed
341 | 
342 | 
343 | def absolute_signal(signal_data):
344 |     absolute_signal = np.abs(signal_data)
345 |     return absolute_signal
346 | 
347 | 
348 | def min_max_scaling_signal(signal_data):
349 |     min_val = min(signal_data)
350 |     max_val = max(signal_data)
351 |     scaled_signal = (signal_data - min_val) / (max_val - min_val)
352 |     return scaled_signal
353 | 
354 | 
355 | def standardize_signal(signal_data):
356 |     mean = np.mean(signal_data)
357 |     std = np.std(signal_data)
358 |     standardized_signal = (signal_data - mean) / std
359 |     return standardized_signal
360 | 
361 | 
362 | def moving_average_signal(signal_data, window_size):
363 |     window = np.ones(window_size) / window_size
364 |     smoothed_signal = np.convolve(signal_data, window, mode='same')
365 |     return smoothed_signal
366 | 
367 | 
368 | def linear_detrend_signal(signal_data):
369 |     detrended_signal = signal_data - np.linspace(signal_data[0], signal_data[-1], len(signal_data))
370 |     return detrended_signal
371 | 
372 | 
373 | def remove_outlier(signal_data, threshold=3.5):
374 |     median = np.median(signal_data)
375 |     diff = np.abs(signal_data - median)
376 |     med_abs_deviation = np.median(diff)
377 |     modified_z_score = 0.6745 * diff / med_abs_deviation if med_abs_deviation != 0 else 0
378 |     modified_z_score[modified_z_score == 0] = 1e-18
379 |     indices = np.where(modified_z_score > threshold)
380 |     signal_data[indices] = median
381 |     return signal_data
382 | 
383 | 
384 | def lowpass_filter(signal_data, time_delta, cutoff_freq, order=5):
385 |     sample_rate = 1 / time_delta
386 |     nyquist_freq = 0.5 * sample_rate
387 |     normalized_cutoff_freq = cutoff_freq / nyquist_freq
388 |     b, a = signal.butter(order, normalized_cutoff_freq, btype='low')
389 |     filtered_signal = signal.lfilter(b, a, signal_data)
390 |     return filtered_signal
391 | 
392 | 
393 | def highpass_filter(signal_data, time_delta, cutoff_freq, order=5):
394 |     sample_rate = 1 / time_delta
395 |     nyquist_freq = 0.5 * sample_rate
396 |     normalized_cutoff_freq = cutoff_freq / nyquist_freq
397 |     b, a = signal.butter(order, normalized_cutoff_freq, btype='high')
398 |     filtered_signal = signal.lfilter(b, a, signal_data)
399 |     return filtered_signal
400 | 
401 | 
402 | def compute_fft(signal_data, time_delta):
403 |     n = len(signal_data)
404 |     f = np.fft.fftfreq(n, d=time_delta)[:n // 2]
405 |     mag = np.fft.fft(signal_data)
406 |     mag = 2.0 / n * np.abs(mag[0:n // 2])
407 |     return f, mag
408 | 
409 | 
410 | def compute_psd(signal_data, time_delta):
411 |     sample_rate = 1 / time_delta
412 |     f, psd = signal.welch(signal_data, sample_rate)
413 |     return f, psd
414 | 
415 | 
416 | def compute_spectrogram(signal_data, time_delta, time_max, nperseg, s_width=100, s_height=100):
417 |     signal_data = signal_data.squeeze()  # Remove the channel dimension if it exists (num_channels, t_length) -> (t_length)
418 | 
419 |     sampling_rate = 1 / time_delta  # Sampling frequency
420 | 
421 |     Sf, St, Sxx = signal.spectrogram(signal_data, nperseg=nperseg)
422 | 
423 |     t_axis = np.linspace(0, time_max, Sxx.shape[1])
424 |     f_axis = np.linspace(0, sampling_rate / 2, Sxx.shape[0])  # sr / 2 is the Nyquist frequency
425 | 
426 |     grid_t_axis = np.linspace(0, time_max, s_width)
427 |     grid_f_axis = np.linspace(0, sampling_rate / 2, s_height)  # sr / 2 is the Nyquist frequency
428 | 
429 |     interp_func = RegularGridInterpolator((f_axis, t_axis), Sxx)
430 | 
431 |     grid_f, grid_t = np.meshgrid(grid_f_axis, grid_t_axis, indexing='ij')  # Create the 2D grid
432 |     points = np.stack((grid_f.ravel(), grid_t.ravel()), axis=-1)  # Create the points to interpolate by ravel() that returns a flattened array
433 |     S_resized = interp_func(points).reshape(s_height, s_width)  # Interpolate the points
434 |     S_resized = np.flipud(S_resized)  # Flip the image to have the origin at the top
435 | 
436 |     return grid_f, grid_t, S_resized
437 | 
438 | 
439 | def write_func_to_py_file(func, file_path):
440 |     with open(file_path, 'w') as file:
441 |         file.write(inspect.getsource(func))
442 | 
443 | 
444 | def create_submission_zipfile(submission_name='mnist_hogeony_pp1', model_best=None, preprocessing=None):
445 |     write_func_to_py_file(preprocessing, f'{submission_name}.py')
446 |     print(f'Created {submission_name}.py')
447 | 
448 |     model_best.save(f'{submission_name}.h5')
449 |     print(f'Created {submission_name}.h5')
450 | 
451 |     with zipfile.ZipFile(f'{submission_name}.zip', 'w') as zipf:
452 |         zipf.write(f'{submission_name}.py')
453 |         zipf.write(f'{submission_name}.h5')
454 |     print(f'Created {submission_name}.zip')
455 | 


--------------------------------------------------------------------------------