├── .gitignore └── AIF_AutoML ├── AIF_AutoML.ipynb ├── AIF_AutoML.pdf ├── AIF_AutoML.zip └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MIRAE-Laboratory/Lectures/e5426236dee68408a876b2f1e6f2fe17a653d9ec/.gitignore -------------------------------------------------------------------------------- /AIF_AutoML/AIF_AutoML.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MIRAE-Laboratory/Lectures/e5426236dee68408a876b2f1e6f2fe17a653d9ec/AIF_AutoML/AIF_AutoML.pdf -------------------------------------------------------------------------------- /AIF_AutoML/AIF_AutoML.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MIRAE-Laboratory/Lectures/e5426236dee68408a876b2f1e6f2fe17a653d9ec/AIF_AutoML/AIF_AutoML.zip -------------------------------------------------------------------------------- /AIF_AutoML/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import glob 4 | import inspect 5 | import zipfile 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | import tensorflow as tf 9 | import tensorflow_datasets as tfds # https://www.tensorflow.org/datasets/catalog/overview?hl=ko 10 | from tqdm import tqdm 11 | from PIL import Image 12 | import scipy.signal as signal 13 | from scipy.interpolate import RegularGridInterpolator 14 | 15 | tf.get_logger().setLevel('ERROR') 16 | 17 | 18 | def load_data(name='mnist', split='train', verbose=True): 19 | try: 20 | ds, info = tfds.load(name=name, split=split, with_info=True) 21 | label_names = info.features['label'].names 22 | 23 | X, Y = [], [] 24 | for xy in tqdm(tfds.as_numpy(ds)): 25 | X.append(xy['image']) 26 | Y.append(tf.keras.utils.to_categorical(xy['label'], num_classes=len(label_names), dtype='float32')) 27 | 28 | except Exception as e: 29 | print(f'Error: {e}') 30 | 31 | if verbose: 32 | try: 33 | print(name, f'X length (shape): {len(X)} ({X[0].shape}),'f'Y length (shape): {len(Y)} ({Y[0].shape})') 34 | except Exception as e: 35 | print(name, f'X length: {len(X)},'f'Y length: {len(Y)}') 36 | 37 | return X, Y, label_names, info 38 | 39 | 40 | def load_image_data(dir_base_path='datasets/mnist', split='train', shuffle=True, random_seed=42, verbose=True): 41 | try: 42 | X, Y = [], [] 43 | dir_split_path = os.path.join(dir_base_path, split) 44 | indices_label_names = sorted([x for x in os.listdir(dir_split_path) if '.' not in x]) 45 | if len(indices_label_names) > 0: 46 | indice = [int(x.split('_')[0]) for x in indices_label_names] 47 | label_names = [x.split('_')[1] for x in indices_label_names] 48 | 49 | for index_label_name, index in zip(indices_label_names, indice): 50 | dir_image_path = f'{dir_split_path}/{index_label_name}' 51 | path_image_list = sorted(glob.glob(f'{dir_image_path}/*')) 52 | for path_image in path_image_list: 53 | X.append(np.array(Image.open(path_image))) 54 | Y.append(tf.keras.utils.to_categorical(index, num_classes=len(label_names), dtype='float32')) 55 | print('Loaded %d images from %s' % (len(path_image_list), dir_image_path)) 56 | else: 57 | filename_list = sorted(os.listdir(dir_split_path)) 58 | for filename in tqdm(filename_list): 59 | X.append(np.array(Image.open(os.path.join(dir_split_path, filename)))) 60 | Y.append(float(filename.split('_')[0])) 61 | label_names = '' 62 | 63 | info = {'base_path': dir_base_path, 'split': split, 'label_names': label_names} 64 | 65 | if shuffle: 66 | dataset = list(zip(X, Y)) 67 | np.random.seed(random_seed) 68 | np.random.shuffle(dataset) 69 | X, Y = zip(*dataset) 70 | 71 | if verbose: 72 | if label_names == '': 73 | print(dir_split_path, f'X count (shape): {len(X)} ({X[0].shape}), Y count (shape): {len(Y)} ({Y[0]})') 74 | else: 75 | print(dir_split_path, f'X count (shape): {len(X)} ({X[0].shape}), Y count (shape): {len(Y)} ({Y[0].shape})') 76 | 77 | return X, Y, label_names, info 78 | 79 | except Exception as e: 80 | print(f'Error: {e}') 81 | 82 | 83 | def load_signal_data(dir_base_path='datasets/sigpeak', split='train', shuffle=True, random_seed=42, verbose=True): 84 | try: 85 | T, X, Y = [], [], [] 86 | dir_split_path = os.path.join(dir_base_path, split) 87 | indices_label_names = sorted([x for x in os.listdir(dir_split_path) if '.' not in x]) 88 | if len(indices_label_names) > 0: 89 | indice = [int(x.split('_')[0]) for x in indices_label_names] 90 | label_names = [x.split('_')[1] for x in indices_label_names] 91 | 92 | for index_label_name, index in zip(indices_label_names, indice): 93 | dir_signal_path = f'{dir_split_path}/{index_label_name}' 94 | path_signal_list = sorted(glob.glob(f'{dir_signal_path}/*')) 95 | for path_signal in path_signal_list: 96 | T.append(np.loadtxt(path_signal, delimiter=',', skiprows=1)[:, 0]) 97 | X.append(np.loadtxt(path_signal, delimiter=',', skiprows=1)[:, 1]) 98 | Y.append(tf.keras.utils.to_categorical(index, num_classes=len(label_names), dtype='float32')) 99 | print('Loaded %d signals from %s' % (len(path_signal_list), dir_signal_path)) 100 | else: 101 | filename_list = sorted(os.listdir(dir_split_path)) 102 | for filename in tqdm(filename_list): 103 | T.append(np.loadtxt(os.path.join(dir_split_path, filename), delimiter=',', skiprows=1)[:, 0]) 104 | X.append(np.loadtxt(os.path.join(dir_split_path, filename), delimiter=',', skiprows=1)[:, 1]) 105 | Y.append(float(filename.split('_')[0])) 106 | label_names = '' 107 | 108 | info = {'base_path': dir_base_path, 'split': split, 'label_names': label_names} 109 | 110 | if shuffle: 111 | dataset = list(zip(T, X, Y)) 112 | np.random.seed(random_seed) 113 | np.random.shuffle(dataset) 114 | T, X, Y = zip(*dataset) 115 | 116 | if verbose: 117 | if label_names == '': 118 | print(dir_split_path, f'T count (shape): {len(T)} ({T[0].shape}), X count (shape): {len(X)} ({X[0].shape}), Y count (shape): {len(Y)} ({Y[0]})') 119 | else: 120 | print(dir_split_path, f'T count (shape): {len(T)} ({T[0].shape}), X count (shape): {len(X)} ({X[0].shape}), Y count (shape): {len(Y)} ({Y[0].shape})') 121 | 122 | return T, X, Y, label_names, info 123 | 124 | except Exception as e: 125 | print(f'Error: {e}') 126 | 127 | 128 | # plot image channelwisely 129 | def plot_images(images, labels, label_names, i_list=[]): 130 | try: 131 | images = np.array(images, dtype=np.uint8) 132 | labels = np.array(labels) 133 | except: 134 | pass 135 | i_list = range(len(images)) if i_list == [] else i_list 136 | grayscale = False if images[0].shape[-1] == 3 else True 137 | 138 | if grayscale: 139 | fig = plt.figure(figsize=(3 * len(i_list), 3)) 140 | else: 141 | fig = plt.figure(figsize=(3 * 4, 3 * len(i_list))) 142 | 143 | ax_list = [] 144 | for i_ax, i_image in enumerate(i_list): 145 | x = images[i_image] 146 | y = labels[i_image].astype(np.float16) 147 | 148 | try: 149 | y = f'{label_names[np.argmax(y)]} {y}' 150 | except: 151 | pass 152 | 153 | if grayscale: 154 | ax = fig.add_subplot(1, len(i_list), i_ax + 1) 155 | ax.imshow(x, cmap='gray') 156 | ax.set_title(f"Image #{i_image}\nLabel: {y}\nShape: {x.shape}") 157 | ax.set_axis_off() 158 | else: 159 | for i_ax_column in range(4): 160 | if i_ax_column == 0: 161 | ax = fig.add_subplot(len(i_list), 4, i_ax * 4 + 1) 162 | ax.imshow(x) 163 | ax.set_title(f"Image #{i_image}\nLabel: {y}\nShape: {x.shape}", loc='left') 164 | ax.set_axis_off() 165 | else: 166 | i_channel = i_ax_column - 1 167 | x_channel = x.copy() 168 | x_channel[:, :, [x for x in range(3) if x != i_channel]] = 0 169 | ax = fig.add_subplot(len(i_list), 4, i_ax * 4 + 1 + i_ax_column) 170 | ax.imshow(x_channel) 171 | ax.set_title(f"{['Red', 'Green', 'Blue'][i_channel]} Channel") 172 | ax.set_axis_off() 173 | ax_list.append(ax) 174 | 175 | fig.tight_layout() 176 | fig.show() 177 | 178 | 179 | # plot signals 180 | def plot_signals(axis, signals, labels, label_names, i_list=[], axis_name='Time [s]'): 181 | try: 182 | axis = np.array(axis) 183 | signals = np.array(signals) 184 | labels = np.array(labels) 185 | except: 186 | pass 187 | i_list = range(len(signals)) if i_list == [] else i_list 188 | 189 | fig = plt.figure(figsize=(6, 3 * len(i_list))) 190 | 191 | ax_list = [] 192 | for i_ax, i_signal in enumerate(i_list): 193 | a = axis[i_signal] 194 | x = signals[i_signal] 195 | y = labels[i_signal].astype(np.float16) 196 | 197 | try: 198 | y = f'{label_names[np.argmax(y)]} {y}' 199 | except: 200 | pass 201 | 202 | ax = fig.add_subplot(len(i_list), 1, i_ax + 1) 203 | ax.plot(a, x * 0, 'r') 204 | ax.plot(a, x, linewidth=5) 205 | ax.set_xlim(a[0], a[-1]) 206 | ax.set_xlabel(axis_name) 207 | ax.set_ylabel('Value') 208 | ax.set_title(f"Image #{i_signal}\nLabel: {y}\nShape: {x.shape}", loc='left') 209 | ax.grid() 210 | 211 | ax_list.append(ax) 212 | 213 | fig.tight_layout() 214 | fig.show() 215 | 216 | 217 | # prompt: python code to convert rgb image to grayscale image based on opencv 218 | def rgb_to_grayscale_image(image): 219 | try: 220 | if image.shape[-1] == 3: 221 | return cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) 222 | else: 223 | print('Error: The input image is not a RGB image') 224 | return image 225 | 226 | except Exception as e: 227 | print(f'Error: {e}') 228 | return image 229 | 230 | 231 | # prompt: python code to resize and fill image based on opencv 232 | def resize_and_fill_image(image, target_size, fill_color=(0, 0, 0)): 233 | try: 234 | if image.shape[-1] == 1: 235 | new_image = np.full((target_size[0], target_size[1]), fill_color[0], dtype=np.uint8) 236 | else: 237 | new_image = np.full((target_size[0], target_size[1], 3), fill_color, dtype=np.uint8) 238 | 239 | scale = min(target_size[0] / image.shape[0], target_size[1] / image.shape[1]) 240 | resized_image = cv2.resize(image, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA) 241 | center = (target_size[0] // 2, target_size[1] // 2) 242 | position = (center[0] - resized_image.shape[0] // 2, center[1] - resized_image.shape[1] // 2) 243 | new_image[position[0]:position[0] + resized_image.shape[0], position[1]:position[1] + resized_image.shape[1]] = resized_image 244 | return new_image 245 | 246 | except Exception as e: 247 | print(f'Error: {e}') 248 | return image 249 | 250 | 251 | # prompt: python code to resize and crop image based on opencv 252 | def resize_and_crop_image(image, target_size): 253 | try: 254 | scale = max(target_size[0] / image.shape[0], target_size[1] / image.shape[1]) 255 | resized_image = cv2.resize(image, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA) 256 | start_x = (resized_image.shape[1] - target_size[1]) // 2 257 | start_y = (resized_image.shape[0] - target_size[0]) // 2 258 | end_x = start_x + target_size[1] 259 | end_y = start_y + target_size[0] 260 | cropped_image = resized_image[start_y:end_y, start_x:end_x] 261 | return cropped_image 262 | 263 | except Exception as e: 264 | print(f'Error: {e}') 265 | return image 266 | 267 | 268 | def flip_image(image, mode='lr'): # lr: left-right, ud: up-down 269 | try: 270 | if mode == 'lr': 271 | flipped_image = np.fliplr(image) 272 | elif mode == 'ud': 273 | flipped_image = np.flipud(image) 274 | else: 275 | print('Error: The input mode is not valid (lr for left-right, ud for up-down)') 276 | return image 277 | return flipped_image 278 | 279 | except Exception as e: 280 | print(f'Error: {e}') 281 | return image 282 | 283 | 284 | # prompt: python code to rotate image based on opencv 285 | def rotate_image(image, angle_list=[], fill_color=(255, 255, 255)): 286 | try: 287 | center = (image.shape[1] // 2, image.shape[0] // 2) 288 | angle = np.random.randint(0, 360) if angle_list == [] else -angle_list[np.random.randint(0, len(angle_list))] 289 | rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0) 290 | rotated_image = cv2.warpAffine(image, rotation_matrix, (image.shape[1], image.shape[0]), borderValue=fill_color) 291 | return rotated_image 292 | 293 | except Exception as e: 294 | print(f'Error: {e}') 295 | return image 296 | 297 | 298 | def mixup_images(images, labels, alpha=0.5): 299 | try: 300 | images, labels = np.array(images), np.array(labels) 301 | except: 302 | assert False, 'The input images should be same shape' 303 | 304 | indices_random = np.random.permutation(images.shape[0]) 305 | images_shuffled, labels_shuffled = images[indices_random], labels[indices_random] 306 | 307 | ratio = np.random.beta(alpha, alpha, images.shape[0]) if alpha > 0 else 1.0 308 | 309 | ratio = ratio.reshape(-1, 1, 1, 1) 310 | images_mixed = ratio * images + (1 - ratio) * images_shuffled 311 | 312 | ratio = ratio.reshape(-1, 1) 313 | labels_mixed = ratio * labels + (1 - ratio) * labels_shuffled 314 | 315 | return images_mixed, labels_mixed 316 | 317 | 318 | def cutmix_images(images, labels, beta=0.5): 319 | try: 320 | images, labels = np.array(images), np.array(labels) 321 | except: 322 | assert False, 'The input images should be same shape' 323 | 324 | indices_random = np.random.permutation(len(images)) 325 | target_a, target_b = labels, labels[indices_random] 326 | 327 | h, w = images[0].shape[:2] 328 | ratio_cut = np.sqrt(1. - np.random.beta(beta, beta)) 329 | w_cut, h_cut = int(w * ratio_cut), int(h * ratio_cut) 330 | x_cut, y_cut = np.random.randint(w), np.random.randint(h) 331 | bbx1, bby1 = np.clip(x_cut - w_cut // 2, 0, w), np.clip(y_cut - h_cut // 2, 0, h) 332 | bbx2, bby2 = np.clip(x_cut + w_cut // 2, 0, w), np.clip(y_cut + h_cut // 2, 0, h) 333 | 334 | images_mixed = images.copy() 335 | images_mixed[:, bbx1:bbx2, bby1:bby2, :] = images[indices_random, bbx1:bbx2, bby1:bby2, :] 336 | 337 | ratio = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (h * w)) 338 | labels_mixed = target_a * ratio + target_b * (1. - ratio) 339 | 340 | return images_mixed, labels_mixed 341 | 342 | 343 | def absolute_signal(signal_data): 344 | absolute_signal = np.abs(signal_data) 345 | return absolute_signal 346 | 347 | 348 | def min_max_scaling_signal(signal_data): 349 | min_val = min(signal_data) 350 | max_val = max(signal_data) 351 | scaled_signal = (signal_data - min_val) / (max_val - min_val) 352 | return scaled_signal 353 | 354 | 355 | def standardize_signal(signal_data): 356 | mean = np.mean(signal_data) 357 | std = np.std(signal_data) 358 | standardized_signal = (signal_data - mean) / std 359 | return standardized_signal 360 | 361 | 362 | def moving_average_signal(signal_data, window_size): 363 | window = np.ones(window_size) / window_size 364 | smoothed_signal = np.convolve(signal_data, window, mode='same') 365 | return smoothed_signal 366 | 367 | 368 | def linear_detrend_signal(signal_data): 369 | detrended_signal = signal_data - np.linspace(signal_data[0], signal_data[-1], len(signal_data)) 370 | return detrended_signal 371 | 372 | 373 | def remove_outlier(signal_data, threshold=3.5): 374 | median = np.median(signal_data) 375 | diff = np.abs(signal_data - median) 376 | med_abs_deviation = np.median(diff) 377 | modified_z_score = 0.6745 * diff / med_abs_deviation if med_abs_deviation != 0 else 0 378 | modified_z_score[modified_z_score == 0] = 1e-18 379 | indices = np.where(modified_z_score > threshold) 380 | signal_data[indices] = median 381 | return signal_data 382 | 383 | 384 | def lowpass_filter(signal_data, time_delta, cutoff_freq, order=5): 385 | sample_rate = 1 / time_delta 386 | nyquist_freq = 0.5 * sample_rate 387 | normalized_cutoff_freq = cutoff_freq / nyquist_freq 388 | b, a = signal.butter(order, normalized_cutoff_freq, btype='low') 389 | filtered_signal = signal.lfilter(b, a, signal_data) 390 | return filtered_signal 391 | 392 | 393 | def highpass_filter(signal_data, time_delta, cutoff_freq, order=5): 394 | sample_rate = 1 / time_delta 395 | nyquist_freq = 0.5 * sample_rate 396 | normalized_cutoff_freq = cutoff_freq / nyquist_freq 397 | b, a = signal.butter(order, normalized_cutoff_freq, btype='high') 398 | filtered_signal = signal.lfilter(b, a, signal_data) 399 | return filtered_signal 400 | 401 | 402 | def compute_fft(signal_data, time_delta): 403 | n = len(signal_data) 404 | f = np.fft.fftfreq(n, d=time_delta)[:n // 2] 405 | mag = np.fft.fft(signal_data) 406 | mag = 2.0 / n * np.abs(mag[0:n // 2]) 407 | return f, mag 408 | 409 | 410 | def compute_psd(signal_data, time_delta): 411 | sample_rate = 1 / time_delta 412 | f, psd = signal.welch(signal_data, sample_rate) 413 | return f, psd 414 | 415 | 416 | def compute_spectrogram(signal_data, time_delta, time_max, nperseg, s_width=100, s_height=100): 417 | signal_data = signal_data.squeeze() # Remove the channel dimension if it exists (num_channels, t_length) -> (t_length) 418 | 419 | sampling_rate = 1 / time_delta # Sampling frequency 420 | 421 | Sf, St, Sxx = signal.spectrogram(signal_data, nperseg=nperseg) 422 | 423 | t_axis = np.linspace(0, time_max, Sxx.shape[1]) 424 | f_axis = np.linspace(0, sampling_rate / 2, Sxx.shape[0]) # sr / 2 is the Nyquist frequency 425 | 426 | grid_t_axis = np.linspace(0, time_max, s_width) 427 | grid_f_axis = np.linspace(0, sampling_rate / 2, s_height) # sr / 2 is the Nyquist frequency 428 | 429 | interp_func = RegularGridInterpolator((f_axis, t_axis), Sxx) 430 | 431 | grid_f, grid_t = np.meshgrid(grid_f_axis, grid_t_axis, indexing='ij') # Create the 2D grid 432 | points = np.stack((grid_f.ravel(), grid_t.ravel()), axis=-1) # Create the points to interpolate by ravel() that returns a flattened array 433 | S_resized = interp_func(points).reshape(s_height, s_width) # Interpolate the points 434 | S_resized = np.flipud(S_resized) # Flip the image to have the origin at the top 435 | 436 | return grid_f, grid_t, S_resized 437 | 438 | 439 | def write_func_to_py_file(func, file_path): 440 | with open(file_path, 'w') as file: 441 | file.write(inspect.getsource(func)) 442 | 443 | 444 | def create_submission_zipfile(submission_name='mnist_hogeony_pp1', model_best=None, preprocessing=None): 445 | write_func_to_py_file(preprocessing, f'{submission_name}.py') 446 | print(f'Created {submission_name}.py') 447 | 448 | model_best.save(f'{submission_name}.h5') 449 | print(f'Created {submission_name}.h5') 450 | 451 | with zipfile.ZipFile(f'{submission_name}.zip', 'w') as zipf: 452 | zipf.write(f'{submission_name}.py') 453 | zipf.write(f'{submission_name}.h5') 454 | print(f'Created {submission_name}.zip') 455 | --------------------------------------------------------------------------------