├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── configs ├── road_signs_apollo.yml └── template.yml ├── embedding_net ├── __init__.py ├── augmentations.py ├── backbones.py ├── datagenerators.py ├── losses_and_accuracies.py ├── models.py └── utils.py ├── examples └── test_network.ipynb ├── images ├── t-sne.png └── t-sne_without_training.png ├── requirements.txt └── tools ├── test.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # vscode 107 | .vscode/ 108 | 109 | *DS_Store* 110 | tf_log/ 111 | to_test.ipynb 112 | encodings/ 113 | weights/ 114 | plots/ 115 | sub.csv 116 | core 117 | work_dirs/ 118 | wandb/ 119 | *.csv 120 | tmp/ 121 | configs/google_embeddings.yml 122 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Rauf 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Siamese and Triplet networks for image classification 2 | 3 | This repository contains Keras implementation of a deep neural networks for embeddings learning using Siamese and Triplets approaches with different negative samples mining strategies. 4 | 5 | # Installation 6 | 7 | ```bash 8 | git clone git@github.com:RocketFlash/EmbeddingNet.git 9 | ``` 10 | 11 | ## Install dependencies 12 | 13 | ### Requirements 14 | 15 | - tensorflow=2.2.0 16 | - scikit-learn 17 | - opencv 18 | - matplotlib 19 | - plotly - for interactive t-SNE plot visualization 20 | - [albumentations](https://github.com/albu/albumentations) - for online augmentation during training 21 | - [image-classifiers](https://github.com/qubvel/classification_models) - for different backbone models 22 | - [efficientnet](https://github.com/qubvel/efficientnet) - for efficientnet backbones 23 | - [keras-rectified-adam](https://github.com/CyberZHG/keras-radam) - for cool state-of-the-art optimization 24 | 25 | Requirements could be installed using the following command: 26 | 27 | ```bash 28 | $ pip install -r requirements.txt 29 | ``` 30 | 31 | To use **wandb** install it with: 32 | ```bash 33 | $ pip install wandb 34 | ``` 35 | 36 | # Train 37 | 38 | There are two options for training your network: annotations files and data folding 39 | 40 | ## Annotations files 41 | The simplest way to start training is to generate annotations csv files. Files must contain two columns: file path and class name. For example generate **train.csv** and **val.csv** that will contain two columns "image_id" and "label" 42 | 43 | ## Data folding 44 | The first option is to split images into folders. In the training dataset, data for training and validation should be in separate folders, in each of which folders with images for each class. Dataset should have the following structure: 45 | 46 | ``` 47 | Dataset 48 | └───train 49 | │ └───class_1 50 | │ │ image1.jpg 51 | │ │ image2.jpg 52 | │ │ ... 53 | │ └───class_2 54 | │ | image1.jpg 55 | │ │ image2.jpg 56 | │ │ ... 57 | │ └───class_N 58 | │ │ ... 59 | │ 60 | └───val 61 | │ └───class_1 62 | │ │ image1.jpg 63 | │ │ image2.jpg 64 | │ │ ... 65 | │ └───class_2 66 | │ | image1.jpg 67 | │ │ image2.jpg 68 | │ │ ... 69 | │ └───class_N 70 | │ │ ... 71 | ``` 72 | 73 | For training, it is necessary to create a configuration file in which all network parameters and training parameters will be indicated. Examples of configuration files can be found in the **configs** folder. 74 | 75 | After the configuration file is created, you can run **train.py** file, and start training: 76 | 77 | ```bash 78 | $ python3 train.py [config (path to configuration_file)] 79 | [--resume_from (the checkpoint file to resume from)] 80 | ``` 81 | 82 | # Test 83 | 84 | The trained model can be tested using the following command: 85 | 86 | ```bash 87 | $ python3 test.py [config (path to configuration_file)] 88 | [--weights (path to trained model weights file)] 89 | [--encodings (path to trained model encodings file)] 90 | [--image (path to image file)] 91 | ``` 92 | 93 | Is is also possible to use [test_network.ipynb](https://github.com/RocketFlash/SiameseNet/blob/master/test_network.ipynb) notebook to test the trained network and visualize input data as well as output encodings. 94 | 95 | # Embeddings visualization 96 | 97 | Result encodings could be visualized interactively using **plot_tsne_interactive** function in [utils.py](https://github.com/RocketFlash/SiameseNet/blob/master/embedding_net/utils.py). 98 | 99 | t-SNE plots of russian traffic sign images embeddings (107 classes) 100 | 101 | Before training: 102 | ![t-SNE before](images/t-sne_without_training.png) 103 | 104 | After training: 105 | ![t-SNE example](images/t-sne.png) 106 | 107 | 108 | # References 109 | 110 | [1] Schroff, Florian, Dmitry Kalenichenko, and James Philbin. [Facenet: A unified embedding for face recognition and clustering.](https://arxiv.org/abs/1503.03832) CVPR 2015 111 | 112 | [2] Alexander Hermans, Lucas Beyer, Bastian Leibe, [In Defense of the Triplet Loss for Person Re-Identification](https://arxiv.org/pdf/1703.07737), 2017 113 | 114 | [3] Adam Bielski [Siamese and triplet networks with online pair/triplet mining in PyTorch](https://github.com/adambielski/siamese-triplet) 115 | 116 | [4] Olivier Moindrot [Triplet Loss and Online Triplet Mining in TensorFlow](https://omoindrot.github.io/triplet-loss) -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RocketFlash/EmbeddingNet/cf7828afee485db7da20449ab60cc7d7b5140c7a/__init__.py -------------------------------------------------------------------------------- /configs/road_signs_apollo.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | input_shape : [48, 48, 3] 3 | encodings_len: 256 4 | mode : 'triplet' 5 | distance_type : 'l1' 6 | backbone_name : 'efficientnet-b1' 7 | backbone_weights : 'noisy-student' 8 | freeze_backbone : False 9 | embeddings_normalization: True 10 | 11 | DATALOADER: 12 | dataset_path : '/home/rauf/datasets/RTSD/' 13 | train_csv_file : '/home/rauf/datasets/RTSD/split_1/train.csv' 14 | val_csv_file : '/home/rauf/datasets/RTSD/split_1/val.csv' 15 | image_id_column : 'file_path' 16 | label_column : 'class_name' 17 | validate : True 18 | val_ratio : 0.2 19 | 20 | GENERATOR: 21 | negatives_selection_mode : 'semihard' 22 | k_classes: 20 23 | k_samples: 3 24 | margin: 0.5 25 | batch_size : 8 26 | n_batches : 500 27 | augmentations : 'none' 28 | 29 | TRAIN: 30 | # optimizer parameters 31 | optimizer : 'radam' 32 | learning_rate : 0.001 33 | decay_factor : 0.1 34 | step_size : 5 35 | 36 | # embeddings learning training parameters 37 | n_epochs : 1000 38 | 39 | # plot training history 40 | plot_history : True 41 | 42 | # SOFTMAX_PRETRAINING: 43 | # # softmax pretraining parameters 44 | # optimizer : 'radam' 45 | # learning_rate : 0.02 46 | # decay_factor : 0.1 47 | # step_size : 5 48 | 49 | # batch_size : 16 50 | # val_steps : 100 51 | # steps_per_epoch : 500 52 | # n_epochs : 5 53 | 54 | ENCODINGS: 55 | # encodings parameters 56 | save_encodings : True 57 | centers_only: False 58 | max_num_samples_of_each_class : 30 59 | knn_k : 1 60 | 61 | GENERAL: 62 | project_name : 'road_signs_efnb1' 63 | work_dir : 'work_dirs/' 64 | tensorboard_callback: False 65 | wandb_callback: False -------------------------------------------------------------------------------- /configs/template.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | input_shape : [128, 128, 3] 3 | encodings_len: 256 4 | mode : 'triplet' 5 | distance_type : 'l1' 6 | backbone_name : 'efficientnet-b5' 7 | backbone_weights : 'noisy-student' 8 | freeze_backbone : False 9 | embeddings_normalization: True 10 | 11 | DATALOADER: 12 | dataset_path : '/home/rauf/datasets/bengali/pngs/train/' 13 | csv_file : '/home/rauf/datasets/bengali/train_new.csv' 14 | image_id_column : 'image_id' 15 | label_column : 'grapheme_root' 16 | validate : True 17 | val_ratio : 0.2 18 | 19 | GENERATOR: 20 | negatives_selection_mode : 'semihard' 21 | k_classes: 3 22 | k_samples: 3 23 | margin: 0.3 24 | batch_size : 8 25 | n_batches : 500 26 | augmentations : 'none' 27 | 28 | TRAIN: 29 | # optimizer parameters 30 | optimizer : 'radam' 31 | learning_rate : 0.00001 32 | decay_factor : 0.99 33 | step_size : 1 34 | 35 | # embeddings learning training parameters 36 | n_epochs : 1000 37 | 38 | # plot training history 39 | plot_history : True 40 | 41 | # SOFTMAX_PRETRAINING: 42 | # # softmax pretraining parameters 43 | # optimizer : 'radam' 44 | # learning_rate : 0.0001 45 | # decay_factor : 0.99 46 | # step_size : 1 47 | 48 | # batch_size : 16 49 | # val_steps : 200 50 | # steps_per_epoch : 1000 51 | # n_epochs : 50 52 | 53 | ENCODINGS: 54 | # encodings parameters 55 | save_encodings : True 56 | centers_only: False 57 | max_num_samples_of_each_class : 30 58 | knn_k : 1 59 | 60 | GENERAL: 61 | project_name : 'bengali_efn_b5' 62 | work_dir : 'work_dirs/' -------------------------------------------------------------------------------- /embedding_net/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RocketFlash/EmbeddingNet/cf7828afee485db7da20449ab60cc7d7b5140c7a/embedding_net/__init__.py -------------------------------------------------------------------------------- /embedding_net/augmentations.py: -------------------------------------------------------------------------------- 1 | import albumentations as A 2 | 3 | 4 | def get_aug(name='default', input_shape=[48, 48, 3]): 5 | if name == 'default': 6 | augmentations = A.Compose([ 7 | A.RandomBrightnessContrast(p=0.4), 8 | A.RandomGamma(p=0.4), 9 | A.HueSaturationValue(hue_shift_limit=20, 10 | sat_shift_limit=30, val_shift_limit=30, p=0.4), 11 | A.CLAHE(p=0.4), 12 | A.Blur(blur_limit=1, p=0.3), 13 | A.GaussNoise(var_limit=(50, 80), p=0.3) 14 | ], p=1) 15 | elif name == 'plates': 16 | augmentations = A.Compose([ 17 | A.RandomBrightnessContrast(p=0.4), 18 | A.RandomGamma(p=0.4), 19 | A.HueSaturationValue(hue_shift_limit=20, 20 | sat_shift_limit=30, 21 | val_shift_limit=30, 22 | p=0.4), 23 | A.CLAHE(p=0.4), 24 | A.HorizontalFlip(p=0.5), 25 | A.VerticalFlip(p=0.5), 26 | A.Blur(blur_limit=1, p=0.3), 27 | A.GaussNoise(var_limit=(50, 80), p=0.3), 28 | A.RandomCrop(p=0.8, height=2*input_shape[1]/3, width=2*input_shape[0]/3) 29 | ], p=1) 30 | elif name == 'deepfake': 31 | augmentations = A.Compose([ 32 | A.HorizontalFlip(p=0.5), 33 | ], p=1) 34 | elif name == 'plates2': 35 | augmentations = A.Compose([ 36 | A.CLAHE(clip_limit=(1,4),p=0.3), 37 | A.HorizontalFlip(p=0.5), 38 | A.VerticalFlip(p=0.5), 39 | A.RandomBrightness(limit=0.2, p=0.3), 40 | A.RandomContrast(limit=0.2, p=0.3), 41 | # A.Rotate(limit=360, p=0.9), 42 | A.RandomRotate90(p=0.3), 43 | A.HueSaturationValue(hue_shift_limit=(-50,50), 44 | sat_shift_limit=(-15,15), 45 | val_shift_limit=(-15,15), 46 | p=0.5), 47 | # A.Blur(blur_limit=(5,7), p=0.3), 48 | A.GaussNoise(var_limit=(10, 50), p=0.3), 49 | A.CenterCrop(p=1, height=2*input_shape[1]//3, width=2*input_shape[0]//3), 50 | A.Resize(p=1, height=input_shape[1], width=input_shape[0]) 51 | ], p=1) 52 | else: 53 | augmentations = None 54 | 55 | return augmentations 56 | -------------------------------------------------------------------------------- /embedding_net/backbones.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.layers import Dense, Input, Lambda, Dropout, Flatten, GlobalAveragePooling2D 2 | from tensorflow.keras.layers import Conv2D, MaxPool2D, BatchNormalization, concatenate 3 | from tensorflow.keras.models import Model 4 | from tensorflow.keras.regularizers import l2 5 | import tensorflow.keras.backend as K 6 | from tensorflow.keras.callbacks import TensorBoard, LearningRateScheduler 7 | from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint 8 | from .datagenerators import SimpleDataGenerator 9 | import os 10 | import numpy as np 11 | 12 | def get_backbone(input_shape, 13 | encodings_len=4096, 14 | backbone_name='simple', 15 | embeddings_normalization=True, 16 | backbone_weights='imagenet', 17 | freeze_backbone=False, 18 | **kwargs): 19 | if backbone_name == 'simple': 20 | input_image = Input(input_shape) 21 | x = Conv2D(64, (10, 10), activation='relu', 22 | kernel_regularizer=l2(2e-4))(input_image) 23 | x = MaxPool2D()(x) 24 | x = Conv2D(128, (7, 7), activation='relu', 25 | kernel_regularizer=l2(2e-4))(x) 26 | x = MaxPool2D()(x) 27 | x = Conv2D(128, (4, 4), activation='relu', 28 | kernel_regularizer=l2(2e-4))(x) 29 | x = MaxPool2D()(x) 30 | x = Conv2D(256, (4, 4), activation='relu', 31 | kernel_regularizer=l2(2e-4))(x) 32 | x = Flatten()(x) 33 | backbone_model = Model( 34 | inputs=[input_image], outputs=[x]) 35 | encoded_output = Dense(encodings_len, activation='relu', 36 | kernel_regularizer=l2(1e-3))(x) 37 | if embeddings_normalization: 38 | encoded_output = Lambda(lambda x: K.l2_normalize( 39 | x, axis=1), name='l2_norm')(encoded_output) 40 | base_model = Model( 41 | inputs=[input_image], outputs=[encoded_output]) 42 | elif backbone_name == 'simple2': 43 | input_image = Input(input_shape) 44 | x = Conv2D(32, kernel_size=3, activation='relu', 45 | kernel_regularizer=l2(2e-4))(input_image) 46 | x = BatchNormalization()(x) 47 | x = Conv2D(32, kernel_size=3, activation='relu', 48 | kernel_regularizer=l2(2e-4))(x) 49 | x = BatchNormalization()(x) 50 | x = Conv2D(32, kernel_size=5, strides=2, padding='same', activation='relu', 51 | kernel_regularizer=l2(2e-4))(x) 52 | x = BatchNormalization()(x) 53 | x = Dropout(0.4)(x) 54 | 55 | x = Conv2D(64, kernel_size=3, activation='relu', 56 | kernel_regularizer=l2(2e-4))(x) 57 | x = BatchNormalization()(x) 58 | x = Conv2D(64, kernel_size=3, activation='relu', 59 | kernel_regularizer=l2(2e-4))(x) 60 | x = BatchNormalization()(x) 61 | x = Conv2D(64, kernel_size=5, strides=2, padding='same', activation='relu', 62 | kernel_regularizer=l2(2e-4))(x) 63 | x = BatchNormalization()(x) 64 | x = Dropout(0.4)(x) 65 | 66 | x = Conv2D(128, kernel_size=4, activation='relu', 67 | kernel_regularizer=l2(2e-4))(x) 68 | x = BatchNormalization()(x) 69 | backbone_model = Model( 70 | inputs=[input_image], outputs=[x]) 71 | x = Flatten()(x) 72 | x = Dense(512, activation="relu")(x) 73 | x = Dropout(0.5)(x) 74 | encoded_output = Dense(encodings_len, activation='relu', 75 | kernel_regularizer=l2(1e-3))(x) 76 | if embeddings_normalization: 77 | encoded_output = Lambda(lambda x: K.l2_normalize( 78 | x, axis=1), name='l2_norm')(encoded_output) 79 | 80 | base_model = Model( 81 | inputs=[input_image], outputs=[encoded_output]) 82 | else: 83 | if backbone_name.startswith('efficientnet'): 84 | import efficientnet.tfkeras as efn 85 | efficientnet_models = { 86 | 'efficientnet-b0': efn.EfficientNetB0, 87 | 'efficientnet-b1': efn.EfficientNetB1, 88 | 'efficientnet-b2': efn.EfficientNetB2, 89 | 'efficientnet-b3': efn.EfficientNetB3, 90 | 'efficientnet-b4': efn.EfficientNetB4, 91 | 'efficientnet-b5': efn.EfficientNetB5, 92 | 'efficientnet-b6': efn.EfficientNetB6, 93 | 'efficientnet-b7': efn.EfficientNetB7, 94 | } 95 | Efficientnet_model = efficientnet_models[backbone_name] 96 | backbone_model = Efficientnet_model(input_shape=input_shape, 97 | weights=backbone_weights, 98 | include_top=False) 99 | else: 100 | from classification_models.tfkeras import Classifiers 101 | classifier, preprocess_input = Classifiers.get(backbone_name) 102 | backbone_model = classifier(input_shape=input_shape, 103 | weights=backbone_weights, 104 | include_top=False) 105 | 106 | if freeze_backbone: 107 | for layer in backbone_model.layers[:-2]: 108 | layer.trainable = False 109 | 110 | after_backbone = backbone_model.output 111 | x = GlobalAveragePooling2D()(after_backbone) 112 | # x = Flatten()(after_backbone) 113 | 114 | x = Dense(encodings_len//2, activation="relu")(x) 115 | 116 | encoded_output = Dense(encodings_len, activation="relu")(x) 117 | if embeddings_normalization: 118 | encoded_output = Lambda(lambda x: K.l2_normalize( 119 | x, axis=1), name='l2_norm')(encoded_output) 120 | base_model = Model( 121 | inputs=[backbone_model.input], outputs=[encoded_output]) 122 | 123 | # base_model._make_predict_function() 124 | 125 | return base_model, backbone_model 126 | 127 | 128 | def pretrain_backbone_softmax(backbone_model, data_loader, params_softmax, params_save_paths): 129 | 130 | optimizer = params_softmax['optimizer'] 131 | learning_rate = params_softmax['learning_rate'] 132 | decay_factor = params_softmax['decay_factor'] 133 | step_size = params_softmax['step_size'] 134 | 135 | input_shape = params_softmax['input_shape'] 136 | batch_size = params_softmax['batch_size'] 137 | val_steps = params_softmax['val_steps'] 138 | steps_per_epoch = params_softmax['steps_per_epoch'] 139 | n_epochs = params_softmax['n_epochs'] 140 | augmentations = params_softmax['augmentations'] 141 | 142 | n_classes = data_loader.n_classes 143 | 144 | x = GlobalAveragePooling2D()(backbone_model.output) 145 | 146 | output = Dense(n_classes, activation='softmax')(x) 147 | model = Model(inputs=[backbone_model.input], outputs=[output]) 148 | 149 | # train 150 | model.compile(optimizer=optimizer, 151 | loss='categorical_crossentropy', 152 | metrics=['accuracy']) 153 | 154 | train_generator = SimpleDataGenerator(data_loader.train_data, 155 | data_loader.class_names, 156 | input_shape=input_shape, 157 | batch_size = batch_size, 158 | n_batches = steps_per_epoch, 159 | augmentations=augmentations) 160 | 161 | if data_loader.validate: 162 | val_generator = SimpleDataGenerator(data_loader.val_data, 163 | data_loader.class_names, 164 | input_shape=input_shape, 165 | batch_size = batch_size, 166 | n_batches = steps_per_epoch, 167 | augmentations=augmentations) 168 | checkpoint_callback_monitor = 'val_loss' 169 | else: 170 | val_generator = None 171 | checkpoint_callback_monitor = 'loss' 172 | 173 | tensorboard_save_path = os.path.join( 174 | params_save_paths['work_dir'], 175 | params_save_paths['project_name'], 176 | 'pretraining_model/tf_log/') 177 | weights_save_file = os.path.join( 178 | params_save_paths['work_dir'], 179 | params_save_paths['project_name'], 180 | 'pretraining_model/weights/', 181 | params_save_paths['project_name']+'_{epoch:03d}' +'.h5') 182 | 183 | callbacks = [ 184 | LearningRateScheduler(lambda x: learning_rate * 185 | decay_factor ** np.floor(x/step_size)), 186 | ReduceLROnPlateau(monitor=checkpoint_callback_monitor, factor=0.1, 187 | patience=20, verbose=1), 188 | EarlyStopping(monitor=checkpoint_callback_monitor, 189 | patience=10, 190 | verbose=1, 191 | restore_best_weights=True), 192 | # TensorBoard(log_dir=tensorboard_save_path), 193 | ModelCheckpoint(filepath=weights_save_file, 194 | verbose=1, 195 | monitor=checkpoint_callback_monitor, 196 | save_best_only=True)] 197 | 198 | history = model.fit_generator(train_generator, 199 | steps_per_epoch=steps_per_epoch, 200 | epochs=n_epochs, 201 | verbose=1, 202 | validation_data=val_generator, 203 | validation_steps=val_steps, 204 | callbacks=callbacks) -------------------------------------------------------------------------------- /embedding_net/datagenerators.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | import random 5 | import pandas as pd 6 | from itertools import combinations 7 | from sklearn.metrics import pairwise_distances 8 | from tensorflow.keras.utils import Sequence 9 | from sklearn.model_selection import train_test_split 10 | from .utils import get_image 11 | from tensorflow.keras import backend as K 12 | import tensorflow as tf 13 | import tqdm 14 | import pickle 15 | 16 | class ENDataLoader(): 17 | def __init__(self, dataset_path, 18 | train_csv_file=None, 19 | val_csv_file=None, 20 | image_id_column = 'image_id', 21 | label_column = 'label', 22 | validate = True, 23 | val_ratio = 0.1, 24 | is_google=False): 25 | 26 | self.dataset_path = dataset_path 27 | self.class_files_paths = {} 28 | self.class_names = [] 29 | 30 | if train_csv_file is not None: 31 | self.class_files_paths = self._load_from_dataframe(train_csv_file, image_id_column, label_column, is_google) 32 | else: 33 | self.class_files_paths = self._load_from_directory() 34 | 35 | self.n_classes = len(self.class_names) 36 | self.n_samples = {k: len(v) for k, v in self.class_files_paths.items()} 37 | 38 | self.validate = validate 39 | self.val_ratio = val_ratio 40 | 41 | if self.validate: 42 | if val_csv_file is not None: 43 | self.train_data = self.class_files_paths 44 | self.val_data = self._load_from_dataframe(val_csv_file, image_id_column, label_column, is_google) 45 | else: 46 | self.train_data, self.val_data = self.split_train_val(self.val_ratio) 47 | else: 48 | self.train_data = self.class_files_paths 49 | self.val_data = {} 50 | 51 | def split_train_val(self, val_ratio): 52 | train_data = {} 53 | val_data = {} 54 | for k, v in self.class_files_paths.items(): 55 | train_d, val_d = train_test_split(v, test_size=val_ratio, random_state=42) 56 | train_data[k] = train_d 57 | val_data[k] = val_d 58 | return train_data, val_data 59 | 60 | def _load_from_dataframe(self, csv_file, image_id_column, label_column, is_google): 61 | class_files_paths = {} 62 | 63 | # Load data from file if it's already created 64 | os.makedirs('tmp' , exist_ok=True) 65 | if os.path.isfile('tmp/data.pickle'): 66 | print('LOAD DATA FROM FILE') 67 | with open('tmp/data.pickle', 'rb') as f: 68 | class_files_paths = pickle.load(f) 69 | self.class_names = list(class_files_paths.keys()) 70 | print('LOADING DATA FROM FILE COMPLETED') 71 | return class_files_paths 72 | 73 | dataframe = pd.read_csv(csv_file) 74 | self.class_names = list(dataframe[label_column].unique()) 75 | 76 | for class_name in tqdm.tqdm(self.class_names): 77 | image_names = dataframe.loc[dataframe[label_column] == class_name][image_id_column] 78 | if is_google: 79 | image_paths = [os.path.join(self.dataset_path,f'{f[0]}/{f[1]}/{f[2]}/', f+'.jpg') for f in image_names] 80 | else: 81 | image_paths = [os.path.join(self.dataset_path, f) for f in image_names] 82 | class_files_paths[class_name] = image_paths 83 | 84 | # Save data to file for fast loading 85 | with open('tmp/data.pickle', 'wb') as f: 86 | pickle.dump(class_files_paths, f) 87 | return class_files_paths 88 | 89 | def _load_from_directory(self): 90 | class_files_paths = {} 91 | self.class_names = [f.name for f in os.scandir(self.dataset_path) if f.is_dir()] 92 | class_dir_paths = [f.path for f in os.scandir(self.dataset_path) if f.is_dir()] 93 | 94 | for class_name, class_dir_path in tqdm.tqdm(zip(self.class_names, class_dir_paths)): 95 | subdirs = [f.path for f in os.scandir(class_dir_path) if f.is_dir()] 96 | temp_list = [] 97 | if len(subdirs)>0: 98 | for subdir in subdirs: 99 | class_image_paths = [f.path for f in os.scandir(subdir) if f.is_file() and 100 | (f.name.endswith('.jpg') or 101 | f.name.endswith('.png') and 102 | not f.name.startswith('._'))] 103 | temp_list.extend(class_image_paths) 104 | else: 105 | class_image_paths = [f.path for f in os.scandir(class_dir_path) if f.is_file() and 106 | (f.name.endswith('.jpg') or 107 | f.name.endswith('.png') and 108 | not f.name.startswith('._'))] 109 | temp_list.extend(class_image_paths) 110 | class_files_paths[class_name] = temp_list 111 | return class_files_paths 112 | 113 | 114 | class ENDataGenerator(Sequence): 115 | def __init__(self, class_files_paths, 116 | class_names, 117 | val_gen = False, 118 | input_shape=None, 119 | batch_size = 32, 120 | n_batches = 10, 121 | n_batches_val = 10, 122 | augmentations=None): 123 | 124 | self.input_shape = input_shape 125 | self.augmentations = augmentations 126 | self.batch_size = batch_size 127 | self.n_batches = n_batches 128 | self.n_batches_val = n_batches_val 129 | self.val_gen = val_gen 130 | self.class_files_paths = class_files_paths 131 | self.class_names = class_names 132 | 133 | self.n_classes = len(self.class_names) 134 | self.n_samples = {k: len(v) for k, v in self.class_files_paths.items()} 135 | 136 | def __len__(self): 137 | if self.val_gen: 138 | return self.n_batches_val 139 | else: 140 | return self.n_batches 141 | 142 | def __getitem__(self, index): 143 | pass 144 | 145 | def _get_images_set(self, clsss, idxs, with_aug=True): 146 | if type(clsss) is list: 147 | img_paths = [self.class_files_paths[cl][idx] for cl, idx in zip(clsss, idxs)] 148 | else: 149 | img_paths = [self.class_files_paths[clsss][idx] for idx in idxs] 150 | 151 | imgs = [get_image(img_path, self.input_shape) for img_path in img_paths] 152 | 153 | if with_aug: 154 | imgs = [self.augmentations(image=img)['image'] for img in imgs] 155 | 156 | return np.array(imgs)/255. 157 | 158 | 159 | class TripletsDataGenerator(ENDataGenerator): 160 | 161 | def __init__(self, embedding_model, 162 | class_files_paths, 163 | class_names, 164 | n_batches = 10, 165 | input_shape=None, 166 | batch_size = 32, 167 | augmentations=None, 168 | k_classes=5, 169 | k_samples=5, 170 | margin=0.5, 171 | negatives_selection_mode='semihard'): 172 | super().__init__(class_files_paths=class_files_paths, 173 | class_names=class_names, 174 | input_shape=input_shape, 175 | batch_size=batch_size, 176 | n_batches=n_batches, 177 | augmentations=augmentations) 178 | modes = {'semihard' : self.semihard_negative, 179 | 'hardest': self.hardest_negative, 180 | 'random_hard': self.random_hard_negative} 181 | self.embedding_model = embedding_model 182 | 183 | self.k_classes = k_classes 184 | self.k_samples = k_samples 185 | self.margin = margin 186 | self.negative_selection_fn = modes[negatives_selection_mode] 187 | 188 | def hardest_negative(self, loss_values, margin=0.5): 189 | hard_negative = np.argmax(loss_values) 190 | return hard_negative if loss_values[hard_negative] > 0 else None 191 | 192 | def random_hard_negative(self, loss_values, margin=0.5): 193 | hard_negatives = np.where(loss_values > 0)[0] 194 | return np.random.choice(hard_negatives) if len(hard_negatives) > 0 else None 195 | 196 | def semihard_negative(self, loss_values, margin=0.5): 197 | semihard_negatives = np.where(np.logical_and( 198 | loss_values < margin, loss_values > 0))[0] 199 | return np.random.choice(semihard_negatives) if len(semihard_negatives) > 0 else None 200 | 201 | def get_batch_triplets_mining(self): 202 | selected_classes_idxs = np.random.choice(self.n_classes, size=self.k_classes, replace=False) 203 | selected_classes = [self.class_names[cl] for cl in selected_classes_idxs] 204 | selected_classes_n_elements = [self.n_samples[cl] for cl in selected_classes] 205 | selected_images = [np.random.choice(cl_n, size=self.k_samples, replace=True) for cl_n in selected_classes_n_elements] 206 | 207 | all_embeddings_list = [] 208 | all_images_list = [] 209 | 210 | 211 | for idx, cl_img_idxs in enumerate(selected_images): 212 | images = self._get_images_set(selected_classes[idx], cl_img_idxs, with_aug=self.augmentations) 213 | all_images_list.append(images) 214 | embeddings = self.embedding_model.predict(images) 215 | all_embeddings_list.append(embeddings) 216 | 217 | all_embeddings = np.vstack(all_embeddings_list) 218 | all_images = np.vstack(all_images_list) 219 | distance_matrix = pairwise_distances(all_embeddings) 220 | 221 | triplet_anchors = [] 222 | triplet_positives = [] 223 | triplet_negatives = [] 224 | targets = [] 225 | for idx, _ in enumerate(selected_classes): 226 | current_class_mask = np.zeros(self.k_classes*self.k_samples, dtype=bool) 227 | current_class_mask[idx*self.k_samples:(idx+1)*self.k_samples] = True 228 | other_classes_mask = np.logical_not(current_class_mask) 229 | positive_indices = np.where(current_class_mask)[0] 230 | negative_indices = np.where(other_classes_mask)[0] 231 | anchor_positives = np.array(list(combinations(positive_indices, 2))) 232 | 233 | ap_distances = distance_matrix[anchor_positives[:,0], anchor_positives[:, 1]] 234 | for anchor_positive, ap_distance in zip(anchor_positives, ap_distances): 235 | loss_values = ap_distance - distance_matrix[anchor_positive[0], negative_indices] + self.margin 236 | loss_values = np.array(loss_values) 237 | hard_negative = self.negative_selection_fn(loss_values, margin=self.margin) 238 | 239 | if hard_negative is not None: 240 | hard_negative = negative_indices[hard_negative] 241 | triplet_anchors.append(all_images[anchor_positive[0]]) 242 | triplet_positives.append(all_images[anchor_positive[1]]) 243 | triplet_negatives.append(all_images[hard_negative]) 244 | targets.append(1) 245 | 246 | if len(triplet_anchors) == 0: 247 | triplet_anchors.append(all_images[anchor_positive[0]]) 248 | triplet_positives.append(all_images[anchor_positive[1]]) 249 | triplet_negatives.append(all_images[negative_indices[0]]) 250 | targets.append(1) 251 | 252 | triplet_anchors = np.array(triplet_anchors) 253 | triplet_positives = np.array(triplet_positives) 254 | triplet_negatives = np.array(triplet_negatives) 255 | targets = np.array(targets) 256 | 257 | triplets = [triplet_anchors, triplet_positives, triplet_negatives] 258 | return triplets, targets 259 | 260 | def __getitem__(self, index): 261 | return self.get_batch_triplets_mining() 262 | 263 | 264 | class SimpleTripletsDataGenerator(ENDataGenerator): 265 | def __init__(self, class_files_paths, 266 | class_names, 267 | input_shape=None, 268 | batch_size = 32, 269 | n_batches = 10, 270 | augmentations=None, 271 | **kwargs): 272 | super().__init__(class_files_paths=class_files_paths, 273 | class_names=class_names, 274 | input_shape=input_shape, 275 | batch_size=batch_size, 276 | n_batches=n_batches, 277 | augmentations=augmentations) 278 | 279 | def get_batch_triplets(self): 280 | triplets = [np.zeros((self.batch_size, self.input_shape[0], self.input_shape[1], 3)), 281 | np.zeros((self.batch_size, self.input_shape[0], self.input_shape[1], 3)), 282 | np.zeros((self.batch_size, self.input_shape[0], self.input_shape[1], 3))] 283 | targets = np.zeros((self.batch_size,)) 284 | 285 | count = 0 286 | 287 | for i in range(self.batch_size): 288 | selected_class_idx = random.randrange(0, self.n_classes) 289 | selected_class = self.class_names[selected_class_idx] 290 | selected_class_n_elements = self.n_samples[selected_class] 291 | another_class_idx = ( 292 | selected_class_idx + random.randrange(1, self.n_classes)) % self.n_classes 293 | another_class = self.class_names[another_class_idx] 294 | another_class_n_elements = self.n_samples[another_class] 295 | 296 | idx1 = random.randrange(0, selected_class_n_elements) 297 | idx2 = (idx1 + random.randrange(1, selected_class_n_elements) 298 | ) % selected_class_n_elements 299 | idx3 = random.randrange(0, another_class_n_elements) 300 | 301 | imgs = self._get_images_set([selected_class, selected_class, another_class], 302 | [idx1, idx2, idx3], 303 | with_aug=self.augmentations) 304 | 305 | triplets[0][count, :, :, :] = imgs[0] 306 | triplets[1][count, :, :, :] = imgs[1] 307 | triplets[2][count, :, :, :] = imgs[2] 308 | targets[i] = 1 309 | count += 1 310 | 311 | return triplets, targets 312 | 313 | def __getitem__(self, index): 314 | return self.get_batch_triplets() 315 | 316 | 317 | class SiameseDataGenerator(ENDataGenerator): 318 | 319 | def __init__(self, class_files_paths, 320 | class_names, 321 | val_gen = False, 322 | input_shape=None, 323 | batch_size = 32, 324 | n_batches = 10, 325 | n_batches_val = 10, 326 | augmentations=None): 327 | 328 | super().__init__(class_files_paths=class_files_paths, 329 | class_names=class_names, 330 | val_gen = False, 331 | input_shape=input_shape, 332 | batch_size=batch_size, 333 | n_batches=n_batches, 334 | n_batches_val = 10, 335 | augmentations=augmentations) 336 | 337 | def get_batch_pairs(self): 338 | pairs = [np.zeros((self.batch_size, self.input_shape[0], self.input_shape[1], 3)), np.zeros( 339 | (self.batch_size, self.input_shape[0], self.input_shape[1], 3))] 340 | targets = np.zeros((self.batch_size, )) 341 | targets1 = np.zeros((self.batch_size, )) 342 | targets2 = np.zeros((self.batch_size, )) 343 | # targets = [] 344 | 345 | n_same_class = self.batch_size // 2 346 | 347 | selected_class_idx = random.randrange(0, self.n_classes) 348 | selected_class = self.class_names[selected_class_idx] 349 | selected_class_n_elements = self.n_samples[selected_class] 350 | 351 | indxs = np.random.randint(selected_class_n_elements, size=self.batch_size) 352 | 353 | with_aug = self.augmentations 354 | count = 0 355 | for i in range(n_same_class): 356 | idx1 = indxs[i] 357 | idx2 = (idx1 + random.randrange(1, selected_class_n_elements)) % selected_class_n_elements 358 | imgs = self._get_images_set([selected_class, selected_class], [idx1, idx2], with_aug=with_aug) 359 | pairs[0][count, :, :, :] = imgs[0] 360 | pairs[1][count, :, :, :] = imgs[1] 361 | targets[i] = 1 362 | count += 1 363 | 364 | for i in range(n_same_class, self.batch_size): 365 | another_class_idx = (selected_class_idx + random.randrange(1, self.n_classes)) % self.n_classes 366 | another_class = self.class_names[another_class_idx] 367 | another_class_n_elements = self.n_samples[another_class] 368 | idx1 = indxs[i] 369 | idx2 = random.randrange(0, another_class_n_elements) 370 | imgs = self._get_images_set([selected_class, another_class], [idx1, idx2], with_aug=with_aug) 371 | pairs[0][count, :, :, :] = imgs[0] 372 | pairs[1][count, :, :, :] = imgs[1] 373 | targets[i] = 0 374 | count += 1 375 | return pairs, targets 376 | 377 | def __getitem__(self, index): 378 | return self.get_batch_pairs() 379 | 380 | 381 | class SimpleDataGenerator(ENDataGenerator): 382 | def __init__(self, class_files_paths, 383 | class_names, 384 | input_shape=None, 385 | batch_size = 32, 386 | n_batches = 10, 387 | augmentations=None): 388 | 389 | super().__init__(class_files_paths=class_files_paths, 390 | class_names=class_names, 391 | input_shape=input_shape, 392 | batch_size=batch_size, 393 | n_batches=n_batches, 394 | augmentations=augmentations) 395 | 396 | def get_batch(self): 397 | images = [ 398 | np.zeros((self.batch_size, self.input_shape[0], self.input_shape[1], 3))] 399 | targets = np.zeros((self.batch_size, self.n_classes)) 400 | 401 | count = 0 402 | with_aug = self.augmentations 403 | for i in range(self.batch_size): 404 | selected_class_idx = random.randrange(0, self.n_classes) 405 | selected_class = self.class_names[selected_class_idx] 406 | selected_class_n_elements = len(self.class_files_paths[selected_class]) 407 | 408 | indx = random.randrange(0, selected_class_n_elements) 409 | 410 | img = self._get_images_set([selected_class], [indx], with_aug=with_aug) 411 | images[0][count, :, :, :] = img[0] 412 | targets[i][selected_class_idx] = 1 413 | count += 1 414 | 415 | return images, targets 416 | 417 | def __getitem__(self, index): 418 | return self.get_batch() 419 | -------------------------------------------------------------------------------- /embedding_net/losses_and_accuracies.py: -------------------------------------------------------------------------------- 1 | import tensorflow.keras.backend as K 2 | 3 | 4 | def contrastive_loss(y_true, y_pred): 5 | '''Contrastive loss from Hadsell-et-al.'06 6 | http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf 7 | ''' 8 | margin = 1 9 | sqaure_pred = K.square(y_pred) 10 | margin_square = K.square(K.maximum(margin - y_pred, 0)) 11 | return K.mean(y_true * sqaure_pred + (1 - y_true) * margin_square) 12 | 13 | 14 | def triplet_loss(margin=0.5): 15 | """ 16 | Implementation of the triplet loss function 17 | Arguments: 18 | y_true -- true labels, required when you define a loss in Keras, you don't need it in this function. 19 | y_pred -- python list containing three objects: 20 | anchor -- the encodings for the anchor data 21 | positive -- the encodings for the positive data (similar to anchor) 22 | negative -- the encodings for the negative data (different from anchor) 23 | Returns: 24 | loss -- real number, value of the loss 25 | """ 26 | def loss_function(y_true, y_pred): 27 | total_lenght = y_pred.shape.as_list()[-1] 28 | 29 | anchor = y_pred[:, 0:int(total_lenght*1/3)] 30 | positive = y_pred[:, int(total_lenght*1/3):int(total_lenght*2/3)] 31 | negative = y_pred[:, int(total_lenght*2/3):int(total_lenght*3/3)] 32 | 33 | # distance between the anchor and the positive 34 | pos_dist = K.sum(K.square(anchor-positive), axis=1) 35 | 36 | # distance between the anchor and the negative 37 | neg_dist = K.sum(K.square(anchor-negative), axis=1) 38 | 39 | # compute loss 40 | basic_loss = pos_dist-neg_dist+margin 41 | loss = K.maximum(basic_loss, 0.0) 42 | return loss 43 | 44 | return loss_function 45 | 46 | 47 | def accuracy(y_true, y_pred): 48 | '''Compute classification accuracy with a fixed threshold on distances. 49 | ''' 50 | return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype))) 51 | 52 | -------------------------------------------------------------------------------- /embedding_net/models.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import tensorflow.keras.backend as K 4 | import cv2 5 | import random 6 | from tensorflow.keras.models import Model, load_model 7 | from tensorflow.keras import optimizers 8 | from tensorflow.keras.layers import Dense, Input, Lambda, concatenate, GlobalAveragePooling2D 9 | import pickle 10 | from .utils import load_encodings, parse_params 11 | from .backbones import get_backbone 12 | from . import losses_and_accuracies as lac 13 | from .utils import get_image, get_images 14 | import matplotlib.pyplot as plt 15 | from sklearn.neighbors import KNeighborsClassifier 16 | 17 | 18 | # TODO 19 | # [] - implement magnet loss 20 | # [] - finalize settings with l1 and l2 losses 21 | 22 | class EmbeddingNet: 23 | 24 | def __init__(self, params): 25 | self.params_model = params['model'] 26 | self.params_dataloader = params['dataloader'] 27 | self.params_generator = params['generator'] 28 | self.params_general = params['general'] 29 | self.params_train = params['train'] 30 | if 'softmax' in params: 31 | self.params_softmax = params['softmax'] 32 | 33 | self.base_model = None 34 | self.backbone_model = None 35 | self.model = None 36 | 37 | self.workdir_path = os.path.join(self.params_general['work_dir'], 38 | self.params_general['project_name']) 39 | 40 | self.encoded_training_data = {} 41 | 42 | def _create_base_model(self): 43 | self.base_model, self.backbone_model = get_backbone(**self.params_model) 44 | output = Dense(units=1, activation='sigmoid', name='output_img')(self.base_model.layers[-1].output) 45 | self.classification_model = Model(inputs=[self.base_model.layers[0].input],outputs=[output]) 46 | 47 | def _generate_encodings(self, imgs): 48 | encodings = self.base_model.predict(imgs) 49 | return encodings 50 | 51 | 52 | def train_embeddings_classifier(self, data_loader, 53 | classification_model, 54 | max_n_samples=10, 55 | shuffle=True): 56 | encodings = self.generate_encodings(data_loader, max_n_samples=max_n_samples, 57 | shuffle=shuffle) 58 | classification_model.fit(encodings['encodings'], 59 | encodings['labels']) 60 | 61 | def generate_encodings(self, data_loader, max_n_samples=10, 62 | shuffle=True): 63 | data_paths, data_labels, data_encodings = [], [], [] 64 | encoded_training_data = {} 65 | 66 | for class_name in data_loader.class_names: 67 | data_list = data_loader.train_data[class_name] 68 | if len(data_list)>max_n_samples: 69 | if shuffle: 70 | random.shuffle(data_list) 71 | data_list = data_list[:max_n_samples] 72 | 73 | data_paths += data_list 74 | imgs = get_images(data_list, self.params_model['input_shape']) 75 | encods = self._generate_encodings(imgs) 76 | for encod in encods: 77 | data_encodings.append(encod) 78 | data_labels.append(class_name) 79 | 80 | encoded_training_data['paths'] = data_paths 81 | encoded_training_data['labels'] = data_labels 82 | encoded_training_data['encodings'] = np.squeeze(np.array(data_encodings)) 83 | 84 | return encoded_training_data 85 | 86 | def save_encodings(self, encoded_training_data, 87 | save_folder='./', 88 | save_file_name='encodings.pkl'): 89 | with open(os.path.join(save_folder, save_file_name), "wb") as f: 90 | pickle.dump(encoded_training_data, f) 91 | 92 | def load_model(self, file_path): 93 | import efficientnet.tfkeras as efn 94 | self.model = load_model(file_path, compile=False) 95 | model_layers = [x for x in self.model.layers[::-1] if isinstance(x, Model)] 96 | self.input_shape = list(self.model.inputs[0].shape[1:]) 97 | self.base_model = Model(inputs=[model_layers[0].input], 98 | outputs=[model_layers[0].output]) 99 | # self.classification_model = Model(inputs=[self.model.layers[3].get_input_at(0)], 100 | # outputs=[self.model.layers[-1].output]) 101 | # self.classification_model._make_predict_function() 102 | # self.base_model._make_predict_function() 103 | 104 | 105 | def save_base_model(self, save_folder): 106 | self.base_model.save(f'{save_folder}base_model.h5') 107 | 108 | def save_onnx(self, save_folder, save_name='base_model.onnx'): 109 | os.environ["TF_KERAS"] = '1' 110 | import efficientnet.tfkeras as efn 111 | import keras2onnx 112 | onnx_model = keras2onnx.convert_keras(self.base_model, self.base_model.name) 113 | keras2onnx.save_model(onnx_model, os.path.join(save_folder, save_name)) 114 | 115 | def predict(self, image): 116 | if type(image) is str: 117 | img = cv2.imread(image) 118 | else: 119 | img = image 120 | img = cv2.resize(img, (self.params_model['input_shape'][0], 121 | self.params_model['input_shape'][1])) 122 | encoding = self.base_model.predict(np.expand_dims(img, axis=0)) 123 | distances = self.calculate_distances(encoding) 124 | max_element = np.argmin(distances) 125 | predicted_label = self.encoded_training_data['labels'][max_element] 126 | return predicted_label 127 | 128 | def predict_knn(self, image, with_top5=False): 129 | if type(image) is str: 130 | img = cv2.imread(image) 131 | else: 132 | img = image 133 | img = cv2.resize(img, (self.input_shape[0], self.input_shape[1])) 134 | 135 | encoding = self.base_model.predict(np.expand_dims(img, axis=0)) 136 | predicted_label = self.encoded_training_data['knn_classifier'].predict(encoding) 137 | if with_top5: 138 | prediction_top5_idx = self.encoded_training_data['knn_classifier'].kneighbors(encoding, n_neighbors=5) 139 | prediction_top5 = [self.encoded_training_data['labels'][prediction_top5_idx[1][0][i]] for i in range(5)] 140 | return predicted_label, prediction_top5 141 | else: 142 | return predicted_label 143 | 144 | def calculate_prediction_accuracy(self, data_loader): 145 | correct_top1 = 0 146 | correct_top5 = 0 147 | 148 | accuracies = {'top1':0, 149 | 'top5':0 } 150 | total_n_of_images = len(data_loader.images_paths['val']) 151 | for img_path, img_label in zip(data_loader.images_paths['val'], 152 | data_loader.images_labels['val']): 153 | prediction, prediction_top5 = self.predict_knn(img_path, with_top5=True) 154 | if prediction[0] == img_label: 155 | correct_top1 += 1 156 | if img_label in prediction_top5: 157 | correct_top5 += 1 158 | accuracies['top1'] = correct_top1/total_n_of_images 159 | accuracies['top5'] = correct_top5/total_n_of_images 160 | 161 | return accuracies 162 | 163 | 164 | class TripletNet(EmbeddingNet): 165 | 166 | def __init__(self, params, training=False): 167 | super().__init__(params) 168 | 169 | self.training = training 170 | 171 | if self.training: 172 | self._create_base_model() 173 | self._create_model_triplet() 174 | 175 | 176 | def _create_model_triplet(self): 177 | input_image_a = Input(self.params_model['input_shape']) 178 | input_image_p = Input(self.params_model['input_shape']) 179 | input_image_n = Input(self.params_model['input_shape']) 180 | 181 | image_encoding_a = self.base_model(input_image_a) 182 | image_encoding_p = self.base_model(input_image_p) 183 | image_encoding_n = self.base_model(input_image_n) 184 | 185 | merged_vector = concatenate([image_encoding_a, image_encoding_p, image_encoding_n],axis=-1, name='merged_layer') 186 | self.model = Model(inputs=[input_image_a, input_image_p, input_image_n],outputs=merged_vector) 187 | 188 | print('Whole model summary') 189 | self.model.summary() 190 | 191 | 192 | class SiameseNet(EmbeddingNet): 193 | 194 | def __init__(self, params, training): 195 | super().__init__(params) 196 | 197 | self.training = training 198 | 199 | if self.training: 200 | self._create_base_model() 201 | self._create_model_siamese() 202 | 203 | def _create_model_siamese(self): 204 | 205 | input_image_1 = Input(self.params_model['input_shape']) 206 | input_image_2 = Input(self.params_model['input_shape']) 207 | 208 | image_encoding_1 = self.base_model(input_image_1) 209 | image_encoding_2 = self.base_model(input_image_2) 210 | 211 | Cl_out1 = Lambda(lambda x: x, name='output_im1') 212 | Cl_out2 = Lambda(lambda x: x, name='output_im2') 213 | 214 | classification_output_1 = Cl_out1(self.classification_model(input_image_1)) 215 | classification_output_2 = Cl_out2(self.classification_model(input_image_2)) 216 | 217 | if self.params_model['distance_type'] == 'l1': 218 | L1_layer = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1])) 219 | distance = L1_layer([image_encoding_1, image_encoding_2]) 220 | 221 | embeddings_output = Dense(units=1, activation='sigmoid', name='output_siamese')(distance) 222 | 223 | elif self.params_model['distance_type'] == 'l2': 224 | 225 | L2_layer = Lambda(lambda tensors: K.sqrt(K.maximum(K.sum(K.square(tensors[0] - tensors[1]), axis=1, keepdims=True), K.epsilon()))) 226 | distance = L2_layer([image_encoding_1, image_encoding_2]) 227 | 228 | embeddings_output = distance 229 | 230 | self.model = Model(inputs=[input_image_1, input_image_2], outputs=[embeddings_output, classification_output_1, classification_output_2]) 231 | 232 | print('Base model summary') 233 | self.base_model.summary() 234 | 235 | print('Whole model summary') 236 | self.model.summary() -------------------------------------------------------------------------------- /embedding_net/utils.py: -------------------------------------------------------------------------------- 1 | from sklearn.manifold import TSNE 2 | import os 3 | os.environ["TF_KERAS"] = '1' 4 | import cv2 5 | import pickle 6 | import numpy as np 7 | from matplotlib import pyplot as plt 8 | import yaml 9 | from tensorflow.keras import optimizers 10 | from .augmentations import get_aug 11 | 12 | 13 | def get_image(img_path, input_shape=None): 14 | img = cv2.imread(img_path) 15 | if img is None: 16 | print('image is not exist ' + img_path) 17 | return None 18 | if input_shape: 19 | img = cv2.resize( 20 | img, (input_shape[0], input_shape[1])) 21 | return img 22 | 23 | def get_images(img_paths, input_shape=None): 24 | imgs = [get_image(img_path, input_shape) for img_path in img_paths] 25 | return np.array(imgs) 26 | 27 | 28 | 29 | def load_encodings(path_to_encodings): 30 | 31 | with open(path_to_encodings, 'rb') as f: 32 | encodings = pickle.load(f) 33 | return encodings 34 | 35 | 36 | def plot_tsne(encodings_path, save_plot_dir, show=True): 37 | encodings = load_encodings(encodings_path) 38 | labels = list(set(encodings['labels'])) 39 | tsne = TSNE() 40 | tsne_train = tsne.fit_transform(encodings['encodings']) 41 | fig, ax = plt.subplots(figsize=(16, 16)) 42 | for i, l in enumerate(labels): 43 | xs = tsne_train[np.array(encodings['labels']) == l, 0] 44 | ys = tsne_train[np.array(encodings['labels']) == l, 1] 45 | ax.scatter(xs, ys, label=l) 46 | for x, y in zip(xs, ys): 47 | plt.annotate(l, 48 | (x, y), 49 | size=8, 50 | textcoords="offset points", 51 | xytext=(0, 10), 52 | ha='center') 53 | 54 | ax.legend(bbox_to_anchor=(1.05, 1), fontsize='small', ncol=2) 55 | if show: 56 | fig.show() 57 | 58 | fig.savefig("{}{}.png".format(save_plot_dir, 'tsne.png')) 59 | 60 | 61 | def plot_tsne_interactive(encodings): 62 | import plotly.graph_objects as go 63 | if type(encodings) is str: 64 | encodings = load_encodings(encodings) 65 | labels = list(set(encodings['labels'])) 66 | tsne = TSNE() 67 | tsne_train = tsne.fit_transform(encodings['encodings']) 68 | fig = go.Figure() 69 | for i, l in enumerate(labels): 70 | xs = tsne_train[np.array(encodings['labels']) == l, 0] 71 | ys = tsne_train[np.array(encodings['labels']) == l, 1] 72 | color = 'rgba({},{},{},{})'.format(int(255*np.random.rand()), 73 | int(255*np.random.rand()), 74 | int(255*np.random.rand()), 0.8) 75 | fig.add_trace(go.Scatter(x=xs, 76 | y=ys, 77 | mode='markers', 78 | marker=dict(color=color, 79 | size=10), 80 | text=str(l), 81 | name=str(l))) 82 | fig.update_layout( 83 | title=go.layout.Title(text="t-SNE plot", 84 | xref="paper", 85 | x=0), 86 | autosize=False, 87 | width=1000, 88 | height=1000 89 | ) 90 | 91 | fig.show() 92 | 93 | 94 | def plot_grapths(history, save_path): 95 | for k, v in history.history.items(): 96 | t = list(range(len(v))) 97 | fig, ax = plt.subplots() 98 | ax.plot(t, v) 99 | 100 | ax.set(xlabel='epoch', ylabel='{}'.format(k), 101 | title='{}'.format(k)) 102 | ax.grid() 103 | 104 | fig.savefig("{}{}.png".format(save_path, k)) 105 | 106 | def plot_batch_simple(data, targets, class_names): 107 | num_imgs = data[0].shape[0] 108 | img_h = data[0].shape[1] 109 | img_w = data[0].shape[2] 110 | full_img = np.zeros((img_h,num_imgs*img_w,3), dtype=np.uint8) 111 | indxs = np.argmax(targets, axis=1) 112 | class_names = [class_names[i] for i in indxs] 113 | 114 | for i in range(num_imgs): 115 | full_img[:,i*img_w:(i+1)*img_w,:] = data[0][i,:,:,::-1]*255 116 | cv2.putText(full_img, class_names[i], (img_w*i + 5, 20), cv2.FONT_HERSHEY_SIMPLEX, 117 | 0.2, (0, 255, 0), 1, cv2.LINE_AA) 118 | plt.figure(figsize = (20,2)) 119 | plt.imshow(full_img) 120 | plt.show() 121 | 122 | 123 | def plot_batch(data, targets): 124 | num_imgs = data[0].shape[0] 125 | it_val = len(data) 126 | fig, axs = plt.subplots(num_imgs, it_val, figsize=( 127 | 30, 50), facecolor='w', edgecolor='k') 128 | fig.subplots_adjust(hspace=.5, wspace=.001) 129 | 130 | axs = axs.ravel() 131 | i = 0 132 | for img_idx, targ in zip(range(num_imgs), targets): 133 | for j in range(it_val): 134 | image = data[j][img_idx]*255 135 | img = cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_BGR2RGB) 136 | axs[i+j].imshow(img) 137 | # axs[i+j].set_title(targ) 138 | i += it_val 139 | 140 | plt.show() 141 | 142 | 143 | def get_optimizer(name, learning_rate): 144 | if name == 'adam': 145 | optimizer = optimizers.Adam(lr=learning_rate) 146 | elif name == 'rms_prop': 147 | optimizer = optimizers.RMSprop(lr=learning_rate) 148 | elif name == 'radam': 149 | from keras_radam import RAdam 150 | optimizer = RAdam(learning_rate) 151 | else: 152 | optimizer = optimizers.SGD(lr=learning_rate) 153 | return optimizer 154 | 155 | 156 | def parse_params(filename='configs/road_signs.yml'): 157 | with open(filename, 'r') as ymlfile: 158 | cfg = yaml.safe_load(ymlfile) 159 | 160 | if 'augmentations_type' in cfg['GENERATOR']: 161 | augmentations = get_aug(cfg['GENERATOR']['augmentation_type'], 162 | cfg['MODEL']['input_shape']) 163 | else: 164 | augmentations = None 165 | 166 | optimizer = get_optimizer(cfg['TRAIN']['optimizer'], 167 | cfg['TRAIN']['learning_rate']) 168 | 169 | params_dataloader = cfg['DATALOADER'] 170 | params_generator = cfg['GENERATOR'] 171 | params_model = cfg['MODEL'] 172 | params_train = cfg['TRAIN'] 173 | params_general = cfg['GENERAL'] 174 | params_encodings = cfg['ENCODINGS'] 175 | 176 | params_generator['input_shape'] = params_model['input_shape'] 177 | params_train['optimizer'] = optimizer 178 | params_generator['augmentations'] = augmentations 179 | 180 | params = {'dataloader' : params_dataloader, 181 | 'generator' : params_generator, 182 | 'model' : params_model, 183 | 'train' : params_train, 184 | 'general': params_general, 185 | 'encodings' : params_encodings} 186 | 187 | if 'SOFTMAX_PRETRAINING' in cfg: 188 | params_softmax = cfg['SOFTMAX_PRETRAINING'] 189 | params_softmax['augmentations'] = augmentations 190 | params_softmax['input_shape'] = params_model['input_shape'] 191 | softmax_optimizer = get_optimizer(cfg['SOFTMAX_PRETRAINING']['optimizer'], 192 | cfg['SOFTMAX_PRETRAINING']['learning_rate']) 193 | params_softmax['optimizer'] = softmax_optimizer 194 | params['softmax'] = params_softmax 195 | 196 | 197 | return params 198 | -------------------------------------------------------------------------------- /images/t-sne.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RocketFlash/EmbeddingNet/cf7828afee485db7da20449ab60cc7d7b5140c7a/images/t-sne.png -------------------------------------------------------------------------------- /images/t-sne_without_training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RocketFlash/EmbeddingNet/cf7828afee485db7da20449ab60cc7d7b5140c7a/images/t-sne_without_training.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | image-classifiers 2 | tensorflow-gpu==2.2.0 3 | matplotlib 4 | albumentations 5 | scikit-learn 6 | opencv-python 7 | keras-rectified-adam 8 | efficientnet -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | from embedding_net.model import EmbeddingNet 2 | import argparse 3 | 4 | if __name__ == "__main__": 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument("config", type=str, 7 | help="path to config file") 8 | parser.add_argument("--weights", type=str, 9 | help="path to trained model weights file") 10 | parser.add_argument("--encodings", type=str, 11 | help="path to trained model encodings file") 12 | parser.add_argument("--image", type=str, help="path to image file") 13 | opt = parser.parse_args() 14 | 15 | config_path = opt.config 16 | weights_path = opt.weights 17 | encodings_path = opt.encodings 18 | image_path = opt.image 19 | 20 | model = EmbeddingNet(config_path) 21 | model.load_model(weights_path) 22 | model.load_encodings(encodings_path) 23 | 24 | model_prediction = model.predict(image_path) 25 | print('Model prediction: {}'.format(model_prediction)) 26 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 5 | ROOT_DIR = os.path.dirname(BASE_DIR) 6 | sys.path.append(ROOT_DIR) 7 | 8 | import numpy as np 9 | from embedding_net.models import EmbeddingNet, TripletNet, SiameseNet 10 | from tensorflow.keras.callbacks import TensorBoard, LearningRateScheduler 11 | from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint 12 | from embedding_net.datagenerators import ENDataLoader, SimpleDataGenerator, TripletsDataGenerator, SimpleTripletsDataGenerator, SiameseDataGenerator 13 | from embedding_net.utils import parse_params, plot_grapths 14 | from embedding_net.backbones import pretrain_backbone_softmax 15 | from embedding_net.losses_and_accuracies import contrastive_loss, triplet_loss, accuracy 16 | import argparse 17 | from tensorflow import keras 18 | from tensorflow.keras.utils import multi_gpu_model 19 | import tensorflow as tf 20 | 21 | 22 | 23 | 24 | def parse_args(): 25 | parser = argparse.ArgumentParser(description='Train a classificator') 26 | parser.add_argument('config', help='model config file path') 27 | parser.add_argument('--resume_from', help='the checkpoint file to resume from') 28 | 29 | args = parser.parse_args() 30 | 31 | return args 32 | 33 | def create_save_folders(params): 34 | work_dir_path = os.path.join(params['work_dir'], params['project_name']) 35 | weights_save_path = os.path.join(work_dir_path, 'weights/') 36 | weights_pretrained_save_path = os.path.join(work_dir_path, 'pretraining_model/weights/') 37 | encodings_save_path = os.path.join(work_dir_path, 'encodings/') 38 | plots_save_path = os.path.join(work_dir_path, 'plots/') 39 | tensorboard_save_path = os.path.join(work_dir_path, 'tf_log/') 40 | tensorboard_pretrained_save_path = os.path.join(work_dir_path, 'pretraining_model/tf_log/') 41 | weights_save_file_path = os.path.join(weights_save_path, 'epoch_{epoch:03d}' + '.hdf5') 42 | 43 | os.makedirs(work_dir_path , exist_ok=True) 44 | os.makedirs(weights_save_path, exist_ok=True) 45 | os.makedirs(weights_pretrained_save_path, exist_ok=True) 46 | os.makedirs(encodings_save_path, exist_ok=True) 47 | os.makedirs(plots_save_path, exist_ok=True) 48 | os.makedirs(tensorboard_pretrained_save_path, exist_ok=True) 49 | 50 | return tensorboard_save_path, weights_save_file_path, plots_save_path 51 | 52 | def main(): 53 | print('LOAD PARAMETERS') 54 | args = parse_args() 55 | cfg_params = parse_params(args.config) 56 | params_train = cfg_params['train'] 57 | params_model = cfg_params['model'] 58 | params_dataloader = cfg_params['dataloader'] 59 | params_generator = cfg_params['generator'] 60 | 61 | tensorboard_save_path, weights_save_file_path, plots_save_path = create_save_folders(cfg_params['general']) 62 | 63 | 64 | work_dir_path = os.path.join(cfg_params['general']['work_dir'], 65 | cfg_params['general']['project_name']) 66 | weights_save_path = os.path.join(work_dir_path, 'weights/') 67 | 68 | 69 | initial_lr = params_train['learning_rate'] 70 | decay_factor = params_train['decay_factor'] 71 | step_size = params_train['step_size'] 72 | 73 | if params_dataloader['validate']: 74 | callback_monitor = 'val_loss' 75 | else: 76 | callback_monitor = 'loss' 77 | 78 | print('LOADING COMPLETED') 79 | callbacks = [ 80 | LearningRateScheduler(lambda x: initial_lr * 81 | decay_factor ** np.floor(x/step_size)), 82 | ReduceLROnPlateau(monitor=callback_monitor, factor=0.1, 83 | patience=4, verbose=1), 84 | EarlyStopping(monitor=callback_monitor, 85 | patience=10, 86 | verbose=1), 87 | ModelCheckpoint(filepath=weights_save_file_path, 88 | monitor=callback_monitor, 89 | save_best_only=True, 90 | verbose=1) 91 | ] 92 | 93 | print('CREATE DATALOADER') 94 | data_loader = ENDataLoader(**params_dataloader) 95 | print('DATALOADER CREATED!') 96 | 97 | if cfg_params['general']['tensorboard_callback']: 98 | callbacks.append(TensorBoard(log_dir=tensorboard_save_path)) 99 | 100 | if cfg_params['general']['wandb_callback']: 101 | import wandb 102 | from wandb.keras import WandbCallback 103 | wandb.init() 104 | callbacks.append(WandbCallback(data_type="image", labels=data_loader.class_names)) 105 | 106 | val_generator = None 107 | print('CREATE MODEL AND DATA GENETATORS') 108 | if params_model['mode'] == 'siamese': 109 | model = SiameseNet(cfg_params, training=True) 110 | train_generator = SiameseDataGenerator(class_files_paths=data_loader.train_data, 111 | class_names=data_loader.class_names, 112 | **params_generator) 113 | if data_loader.validate: 114 | val_generator = SiameseDataGenerator(class_files_paths=data_loader.val_data, 115 | class_names=data_loader.class_names, 116 | val_gen = True, 117 | **params_generator) 118 | losses = {'output_siamese' : contrastive_loss} 119 | metric = {'output_siamese' : accuracy} 120 | else: 121 | if cfg_params['general']['gpu_ids']: 122 | print('Multiple gpu mode') 123 | gpu_ids = cfg_params['general']['gpu_ids'] 124 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 125 | os.environ["CUDA_VISIBLE_DEVICES"] = gpu_ids 126 | print(f'Using gpu ids: {gpu_ids}') 127 | gpu_ids_list = gpu_ids.split(',') 128 | n_gpu = len(gpu_ids_list) 129 | else: 130 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 131 | os.environ["CUDA_VISIBLE_DEVICES"] = '0' 132 | n_gpu = 1 133 | print('Use single gpu mode') 134 | 135 | model = TripletNet(cfg_params, training=True) 136 | if n_gpu>1: 137 | strategy = tf.distribute.MirroredStrategy() 138 | with strategy.scope(): 139 | model.base_model = multi_gpu_model(model.base_model, gpus=n_gpu) 140 | # model.base_model = tf.keras.utils.multi_gpu_model(model.base_model, gpus=n_gpu) 141 | 142 | train_generator = TripletsDataGenerator(embedding_model=model.base_model, 143 | class_files_paths=data_loader.train_data, 144 | class_names=data_loader.class_names, 145 | **params_generator) 146 | 147 | if data_loader.validate: 148 | val_generator = SimpleTripletsDataGenerator(data_loader.val_data, 149 | data_loader.class_names, 150 | **params_generator) 151 | losses = triplet_loss(params_generator['margin']) 152 | metric = ['accuracy'] 153 | print('DONE') 154 | 155 | 156 | if args.resume_from is not None: 157 | model.load_model(args.resume_from) 158 | 159 | print('COMPILE MODEL') 160 | model.model.compile(loss=losses, 161 | optimizer=params_train['optimizer'], 162 | metrics=metric) 163 | 164 | if 'softmax' in cfg_params: 165 | params_softmax = cfg_params['softmax'] 166 | params_save_paths = cfg_params['general'] 167 | pretrain_backbone_softmax(model.backbone_model, 168 | data_loader, 169 | params_softmax, 170 | params_save_paths) 171 | 172 | history = model.model.fit_generator(train_generator, 173 | validation_data=val_generator, 174 | epochs=params_train['n_epochs'], 175 | callbacks=callbacks, 176 | verbose=1, 177 | use_multiprocessing=False) 178 | 179 | if params_train['plot_history']: 180 | plot_grapths(history, plots_save_path) 181 | 182 | if __name__ == '__main__': 183 | main() 184 | --------------------------------------------------------------------------------