├── src ├── __init__.py ├── argument_parser.py ├── models │ ├── test_pdart.py │ ├── tiny_darknet.py │ ├── test_fbnet_mobilenet.py │ ├── test_generic_model.py │ ├── vgg.py │ ├── squeezenet.py │ ├── __init__.py │ ├── wide_resnet.py │ ├── complexnet.py │ ├── generic_model.py │ ├── hypermodels.py │ ├── fbnetv2.py │ ├── mobilenet.py │ ├── alexnet.py │ ├── test_fbnetv2.py │ ├── resnet.py │ └── fbnet_mobilenet.py ├── test_losses.py ├── data │ ├── test_dataloader.py │ └── dataloader.py ├── losses.py ├── test_argument_parser.py ├── test_tfrecord_extractor.py ├── utils.py ├── inference_benchmark.py └── test_utils.py ├── tests ├── __init__.py ├── system_tests │ ├── dataviz.sh │ ├── models_training_upstride.sh │ ├── inference_benchmark.sh │ └── models_training.sh └── unit_tests │ └── test_channels_first_last.py ├── ressources ├── training.gif ├── keras_tuner.png └── testing │ ├── cat.png │ ├── black_and_white.jpeg │ ├── fake_LOC_val_solution.csv │ ├── config.yml │ └── fake_LOC_synset_mapping.txt ├── .gitmodules ├── .gitignore ├── conf.yml ├── dockerfiles ├── tensorflow.dockerfile └── upstride.dockerfile ├── copy_and_resize.py ├── makefile ├── dataviz.py ├── test.py ├── scripts ├── bayesian_opt_results_parser.py ├── alpha_viz.py ├── hyperband_results_parser.py └── test_tfrecord_writer.py ├── README.md ├── inference_client.py ├── inference_server.py ├── train_keras_tuner.py ├── inference_benchmark.py ├── documentation └── doc.md └── train_arch_search.py /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ressources/training.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UpStride/classification-api/HEAD/ressources/training.gif -------------------------------------------------------------------------------- /ressources/keras_tuner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UpStride/classification-api/HEAD/ressources/keras_tuner.png -------------------------------------------------------------------------------- /ressources/testing/cat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UpStride/classification-api/HEAD/ressources/testing/cat.png -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "submodules/global_dl"] 2 | path = submodules/global_dl 3 | url = git@github.com:UpStride/global-dl.git 4 | -------------------------------------------------------------------------------- /ressources/testing/black_and_white.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UpStride/classification-api/HEAD/ressources/testing/black_and_white.jpeg -------------------------------------------------------------------------------- /ressources/testing/fake_LOC_val_solution.csv: -------------------------------------------------------------------------------- 1 | ImageId,PredictionString 2 | ILSVRC2012_val_0,n01484850 85 1 499 272 3 | ILSVRC2012_val_1,n01496331 131 0 499 254 4 | -------------------------------------------------------------------------------- /ressources/testing/config.yml: -------------------------------------------------------------------------------- 1 | parameter_int: 1 2 | parameter_str: plop 3 | parameter_list: [1, 2, 3] 4 | parameter_bool: true 5 | parameter_dict: 6 | parameter_int: 3 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | results*.md 3 | profiling 4 | README_perso.md 5 | user_conf*.yml 6 | .coverage 7 | coverage.xml 8 | rabbitmq_conf.yml 9 | *_conf.yml 10 | .vscode 11 | inference_config 12 | .stfolder 13 | .ipynb_checkpoints 14 | configs 15 | wandb -------------------------------------------------------------------------------- /conf.yml: -------------------------------------------------------------------------------- 1 | model_name: MobileNetV2 2 | num_epochs: 2 3 | checkpoint_dir: exp/checkpoint 4 | log_dir: exp/log 5 | framework: tensorflow 6 | export_dir: exp 7 | dataloader: 8 | name: cifar10 9 | train_list: [RandomHorizontalFlip, Normalize] 10 | val_list: [Normalize] 11 | batch_size: 64 12 | input_size: [32, 32, 3] 13 | num_classes: 10 14 | optimizer: 15 | lr: 0.0001 16 | -------------------------------------------------------------------------------- /ressources/testing/fake_LOC_synset_mapping.txt: -------------------------------------------------------------------------------- 1 | n01440764 tench, Tinca tinca 2 | n01443537 goldfish, Carassius auratus 3 | n01484850 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias 4 | n01491361 tiger shark, Galeocerdo cuvieri 5 | n01494475 hammerhead, hammerhead shark 6 | n01496331 electric ray, crampfish, numbfish, torpedo 7 | n01498041 stingray 8 | n01514668 cock 9 | n01514859 hen 10 | n01518878 ostrich, Struthio camelus 11 | -------------------------------------------------------------------------------- /dockerfiles/tensorflow.dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:2.4.1-gpu 2 | 3 | RUN apt-get update && \ 4 | apt-get install -y libsm6 libxrender1 libxext6 libgl1-mesa-glx && \ 5 | pip install \ 6 | opencv-python \ 7 | pyyaml \ 8 | tensorflow_datasets \ 9 | upstride_argparse \ 10 | keras-tuner \ 11 | pandas \ 12 | wandb \ 13 | tensorflow_addons && \ 14 | rm -rf /var/lib/apt/lists/* 15 | 16 | COPY src /opt/src 17 | COPY submodules /opt/submodules 18 | COPY train.py /opt/train.py 19 | COPY train_arch_search.py /opt/train_arch_search.py 20 | WORKDIR /opt 21 | CMD python train.py 22 | -------------------------------------------------------------------------------- /tests/system_tests/dataviz.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | python dataviz.py \ 4 | --dataloader.batch_size 128 \ 5 | --dataloader.name imagenette/full-size-v2 \ 6 | --dataloader.train_list RandomCropThenResize RandomHorizontalFlip Cutout ColorJitter Translate \ 7 | --dataloader.val_list CentralCrop \ 8 | --dataloader.val_split_id validation \ 9 | --dataloader.train_split_id train \ 10 | --dataloader.Translate.width_shift_range 0.2 \ 11 | --dataloader.Translate.height_shift_range 0.2 \ 12 | --dataloader.RandomCrop.size 224 224 3 \ 13 | --dataloader.CentralCrop.size 224 224 \ 14 | --dataloader.Cutout.length 16 \ 15 | -------------------------------------------------------------------------------- /dockerfiles/upstride.dockerfile: -------------------------------------------------------------------------------- 1 | FROM eu.gcr.io/fluid-door-230710/upstride:py-1.1.1-tf2.3.0-gpu 2 | 3 | RUN apt-get update && \ 4 | apt-get install -y libsm6 libxrender1 libxext6 libgl1-mesa-glx && \ 5 | pip install \ 6 | opencv-python \ 7 | pyyaml \ 8 | tensorflow_datasets \ 9 | upstride_argparse \ 10 | keras-tuner \ 11 | pandas \ 12 | wandb \ 13 | tensorflow_addons && \ 14 | rm -rf /var/lib/apt/lists/* 15 | 16 | COPY src /opt/src 17 | COPY submodules /opt/submodules 18 | COPY train.py /opt/train.py 19 | COPY train_arch_search.py /opt/train_arch_search.py 20 | WORKDIR /opt 21 | CMD python train.py 22 | -------------------------------------------------------------------------------- /copy_and_resize.py: -------------------------------------------------------------------------------- 1 | """this script copy all a directory structure and resize all the images to a specific size 2 | """ 3 | import os 4 | from src.argument_parser import parse_config 5 | from src.utils import copy_and_resize 6 | 7 | 8 | def main(): 9 | arguments = [[str, "source", "", "directory to copy", lambda x: x != "" and os.path.isdir(x)], 10 | [str, "dest", "", "destination of the copy", lambda x: x != ""], 11 | [int, "img_size", 256, "height and width of the images after copy (the image is a square)"]] 12 | args = parse_config(arguments) 13 | copy_and_resize(args.source, args.dest, args.img_size) 14 | 15 | 16 | if __name__ == "__main__": 17 | main() 18 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | build_tensorflow: 2 | docker build -t upstride/classification_api:tensorflow-2.0 -f dockerfiles/tensorflow.dockerfile . 3 | 4 | build: 5 | docker build -t upstride/classification_api:upstride-2.0 -f dockerfiles/upstride.dockerfile . 6 | 7 | run: 8 | @docker run -it --rm --gpus all --privileged \ 9 | -v $$(pwd):/opt \ 10 | -v ~/tensorflow_datasets/:/root/tensorflow_datasets \ 11 | -v ~/.keras/datasets:/root/.keras/datasets \ 12 | upstride/classification_api:upstride-2.0 \ 13 | bash 14 | 15 | run_tensorflow: 16 | @docker run -it --rm --gpus all --privileged \ 17 | -v $$(pwd):/opt \ 18 | -v ~/tensorflow_datasets/:/root/tensorflow_datasets \ 19 | -v ~/.keras/datasets:/root/.keras/datasets \ 20 | upstride/classification_api:tensorflow-2.0 \ 21 | bash 22 | -------------------------------------------------------------------------------- /dataviz.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import tensorflow as tf 4 | import upstride_argparse as argparse 5 | from src.data import dataloader 6 | 7 | arguments = [ 8 | ['namespace', 'dataloader', dataloader.arguments], 9 | ] 10 | 11 | 12 | def main(): 13 | config = argparse.parse_cmd(arguments) 14 | datasets = { 15 | 'train': dataloader.get_dataset(config['dataloader'], transformation_list=config['dataloader']['train_list'], num_classes=10, split=config['dataloader']['train_split_id']), 16 | 'val': dataloader.get_dataset(config['dataloader'], transformation_list=config['dataloader']['val_list'], num_classes=10, split=config['dataloader']['val_split_id']) 17 | } 18 | 19 | for dataset_type in ['train', 'val']: 20 | for i, (images, y) in enumerate(datasets[dataset_type]): 21 | image = images[0] 22 | # opencv manage images as BGR object, TF as RGB 23 | image = image.numpy()[:, :, ::-1] 24 | cv2.imwrite(os.path.join('/tmp', f'{dataset_type}_{i}.png'), image) 25 | if i == 20: 26 | break 27 | 28 | 29 | if __name__ == '__main__': 30 | main() 31 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | from src.data.test_augmentations import TestAugmentations 4 | from src.data.test_dataloader import TestDataLoader 5 | from src.test_losses import TestLosses 6 | from src.models.test_fbnetv2 import * 7 | from src.models.test_fbnet_mobilenet import * 8 | from src.models.test_pdart import * 9 | 10 | # from src.test_utils import TestUtils 11 | # from src.models.test_generic_model import TestModel1 # TestLayer 12 | # from src.test_export import TestExport 13 | # from src.test_model_tools import TestLRDecay 14 | # from src.test_metrics import TestMetrics, TestCountFlops 15 | 16 | sys.path.append('scripts') 17 | sys.path.append('tests') 18 | sys.path.append('ressources') 19 | 20 | from scripts.test_tfrecord_writer import TestTfrecordWriter 21 | from tests.unit_tests.test_compare_dataloader import TestCompareDataLoader 22 | # dev note: TestCompareChannelsFirstLast take around ~5 minutes as there are lots of models to build. 23 | from tests.unit_tests.test_channels_first_last import TestCompareChannelsFirstLast 24 | 25 | if __name__ == "__main__": 26 | unittest.main() 27 | -------------------------------------------------------------------------------- /src/argument_parser.py: -------------------------------------------------------------------------------- 1 | import os 2 | from .models.generic_model import framework_list 3 | from submodules.global_dl.training.optimizers import optimizer_list 4 | 5 | 6 | training_arguments_das = [ 7 | ['namespace', 'temperature', [ 8 | [int, 'init_value', 5, 'initial value of the temperature parameters for to control Gumbel Softmax'], 9 | [float, 'decay_rate', 0.956, 'decay rate to anneal temperature'], 10 | [int, 'decay_steps', 1, 'decay steps'] 11 | ]], 12 | 13 | ['namespace', 'optimizer', [ 14 | [str, 'name', 'adam', 'optimize to be used for updating architecture parameters for search', lambda x: x.lower() in optimizer_list], 15 | [float, "lr", 0.1, 'learning rate', lambda x: x > 0], 16 | [float, 'momentum', 0.9, 'used when optimizer name is specified as sgd_momentum'], 17 | ]], 18 | [str, 'exported_architecture', 'export.yml', 'file to write the exported architecture'], 19 | [float, 'weight_decay', 1e-4, 'weight decay rate'], 20 | [float, 'arch_param_decay', 5e-4, 'weight decay rate for architecture parameters'], 21 | [int, 'num_warmup', 10, 'number of warmup epoch'] 22 | ] 23 | -------------------------------------------------------------------------------- /src/models/test_pdart.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import tensorflow as tf 3 | import numpy as np 4 | from .pdart import DropPath 5 | 6 | class TestDropPath(unittest.TestCase): 7 | def test(self): 8 | x = tf.ones(shape=(10000, 1, 1, 1)) 9 | drop_path_prob = tf.convert_to_tensor(0.3) 10 | y = DropPath()([x, drop_path_prob]) 11 | # Mean of y shouldn't change much 12 | self.assertAlmostEqual(tf.reduce_sum(y).numpy()/10000, 1., 1) 13 | 14 | def test_nn(self): 15 | """ Create a single layer NN 16 | """ 17 | # define NN 18 | x = tf.keras.layers.Input(shape=(1, 1, 1)) 19 | drop_path_prob = tf.keras.layers.Input(shape=[]) 20 | y = DropPath()([x, drop_path_prob]) 21 | model = tf.keras.Model(inputs=[x, drop_path_prob], outputs=y) 22 | 23 | # run NN 24 | inputs = [tf.ones(shape=(1000, 1, 1, 1)), tf.convert_to_tensor(0.3)] 25 | outputs = model(inputs) 26 | outputs_mean = tf.reduce_mean(outputs) 27 | self.assertAlmostEqual(outputs_mean.numpy(), 1., 1) 28 | 29 | inputs = [tf.ones(shape=(1000, 1, 1, 1)), tf.convert_to_tensor(0.)] 30 | outputs2 = model(inputs) 31 | np.array_equal(np.ones(shape=(1000, 1, 1, 1)), outputs2.numpy()) 32 | -------------------------------------------------------------------------------- /tests/system_tests/models_training_upstride.sh: -------------------------------------------------------------------------------- 1 | # train a mobilenet channel first with upstride 2 | # need a least 6 GB of VRAM 3 | 4 | python train.py \ 5 | --model_name MobileNetV2Cifar10NCHW \ 6 | --model.upstride_type 2 \ 7 | --model.factor 4 \ 8 | --model.num_classes 10 \ 9 | --model.input_size 32 32 3 \ 10 | --num_epochs 1000 \ 11 | --checkpoint_dir /tmp/checkpointdata2345 \ 12 | --log_dir log/translate \ 13 | --dataloader.name cifar10 \ 14 | --dataloader.train_list RandomHorizontalFlip Translate Cutout Normalize \ 15 | --dataloader.val_list Normalize \ 16 | --dataloader.val_split_id test \ 17 | --dataloader.Resize.size 36 36 \ 18 | --dataloader.RandomCrop.size 32 32 3 \ 19 | --dataloader.Translate.width_shift_range 0.25 \ 20 | --dataloader.Translate.height_shift_range 0.25 \ 21 | --dataloader.Cutout.length 4 \ 22 | --dataloader.batch_size 128 \ 23 | --early_stopping 40 \ 24 | --optimizer.lr 0.1 \ 25 | --optimizer.lr_decay_strategy.lr_params.patience 20 \ 26 | --optimizer.lr_decay_strategy.lr_params.strategy lr_reduce_on_plateau \ 27 | --optimizer.lr_decay_strategy.lr_params.decay_rate 0.3 \ 28 | --config.mixed_precision 29 | 30 | rm -r /tmp/results 31 | rm -r /tmp/checkpoint 32 | -------------------------------------------------------------------------------- /tests/system_tests/inference_benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | # Call the benchmarking script without going through the experiment management 6 | python src/inference_benchmark.py 7 | 8 | # With basic TensorFlow 9 | python inference_benchmark.py \ 10 | --batch_size 32 \ 11 | --comments plop \ 12 | --cuda_visible_device 0 \ 13 | --docker_images local \ 14 | --engines tensorflow \ 15 | --factor 1 \ 16 | --models MobileNetV2NCHW \ 17 | --output /tmp/results.md \ 18 | --profiling_dir /tmp/profiling \ 19 | --n_steps 10 20 | 21 | # With tensorRT FP32 22 | python inference_benchmark.py \ 23 | --batch_size 32 \ 24 | --comments plop \ 25 | --cuda_visible_device 0 \ 26 | --docker_images local \ 27 | --engines tensorflow \ 28 | --factor 1 \ 29 | --models MobileNetV2NCHW \ 30 | --output /tmp/results.md \ 31 | --profiling_dir /tmp/profiling \ 32 | --n_steps 10 \ 33 | --tensorrt \ 34 | --tensorrt_precision FP32 35 | 36 | # With tensorRT FP16 37 | python inference_benchmark.py \ 38 | --batch_size 32 \ 39 | --comments plop \ 40 | --cuda_visible_device 0 \ 41 | --docker_images local \ 42 | --engines tensorflow \ 43 | --factor 1 \ 44 | --models MobileNetV2NCHW \ 45 | --output /tmp/results.md \ 46 | --profiling_dir /tmp/profiling \ 47 | --n_steps 10 \ 48 | --tensorrt \ 49 | --tensorrt_precision FP16 50 | -------------------------------------------------------------------------------- /src/models/tiny_darknet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .generic_model import GenericModelBuilder 3 | 4 | 5 | class TinyDarknet(GenericModelBuilder): 6 | def model(self, x): 7 | # First half 8 | x = self.conv2d_unit(x, filters=16 // self.factor, kernels=3) 9 | x = self.layers.MaxPool2D(pool_size=2, strides=2, padding='same')(x) 10 | x = self.conv2d_unit(x, filters=32 // self.factor, kernels=3) 11 | x = self.layers.MaxPool2D(pool_size=2, strides=2, padding='same')(x) 12 | x = self.conv2d_unit(x, filters=64 // self.factor, kernels=3) 13 | x = self.layers.MaxPool2D(pool_size=2, strides=2, padding='same')(x) 14 | x = self.conv2d_unit(x, filters=128 // self.factor, kernels=3) 15 | x = self.layers.MaxPool2D(pool_size=2, strides=2, padding='same')(x) 16 | x = self.conv2d_unit(x, filters=256 // self.factor, kernels=3) 17 | 18 | # 2nd half 19 | x = self.layers.MaxPool2D(pool_size=2, strides=2, padding='same')(x) 20 | x = self.conv2d_unit(x, filters=512 // self.factor, kernels=3) 21 | x = self.layers.MaxPool2D(pool_size=2, strides=1, padding='same')(x) 22 | x = self.conv2d_unit(x, filters=1024 // self.factor, kernels=3) 23 | 24 | x = self.layers.GlobalAveragePooling2D()(x) 25 | return x 26 | 27 | def conv2d_unit(self, x, filters, kernels, strides=1, padding='same'): 28 | x = self.layers.Conv2D(filters, kernels, padding=padding, strides=strides, use_bias=False)(x) 29 | x = self.layers.BatchNormalization(axis=self.channel_axis)(x) 30 | x = self.layers.LeakyReLU(alpha=0.1)(x) 31 | return x 32 | -------------------------------------------------------------------------------- /src/test_losses.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import tensorflow as tf 3 | from src.models.fbnetv2 import ChannelMasking 4 | from src.losses import flops_loss 5 | 6 | 7 | class TestLosses(unittest.TestCase): 8 | def test_flops_loss(self): 9 | model = tf.keras.Sequential([ 10 | tf.keras.layers.Input(shape=(24, 24, 3)), 11 | tf.keras.layers.Conv2D(3, (3, 3), padding='same', use_bias=False), 12 | ChannelMasking(1, 3, 1, "hello", gumble_noise=False) 13 | ]) 14 | 15 | model(tf.zeros((1, 24, 24, 3), dtype=tf.float32)) 16 | l = flops_loss(model) 17 | conv_flops = 3*3*3*3*24*24 * 2 18 | self.assertLess(l, conv_flops) 19 | self.assertAlmostEqual(float(l), conv_flops * ((1/3)**2 + (1/3)*(2/3)+(1/3))) 20 | 21 | model.layers[1].g = tf.convert_to_tensor([1., 0., 0.], dtype=tf.float32) 22 | l = flops_loss(model) 23 | self.assertAlmostEqual(float(l), conv_flops * ((1/3))) 24 | 25 | model.layers[1].g = tf.convert_to_tensor([0., 1., 0.], dtype=tf.float32) 26 | l = flops_loss(model) 27 | self.assertAlmostEqual(float(l), conv_flops * ((2/3))) 28 | 29 | model.layers[1].g = tf.convert_to_tensor([0., 0., 1.], dtype=tf.float32) 30 | l = flops_loss(model) 31 | self.assertAlmostEqual(float(l), conv_flops) 32 | 33 | def test_flops_with_intermediate_ops_loss(self): 34 | model = tf.keras.Sequential([ 35 | tf.keras.layers.Input(shape=(24, 24, 3)), 36 | tf.keras.layers.Conv2D(3, (3, 3), padding='same', use_bias=False), 37 | tf.keras.layers.ReLU(), 38 | ChannelMasking(1, 3, 1, "hello", gumble_noise=False) 39 | ]) 40 | model(tf.zeros((1, 24, 24, 3), dtype=tf.float32)) 41 | l = flops_loss(model) 42 | conv_flops = 3*3*3*3*24*24 * 2 43 | self.assertLess(l, conv_flops) 44 | self.assertAlmostEqual(float(l), conv_flops * ((1/3)**2 + (1/3)*(2/3)+(1/3))) 45 | -------------------------------------------------------------------------------- /src/models/test_fbnet_mobilenet.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import yaml 3 | import tempfile 4 | import shutil 5 | import numpy as np 6 | 7 | from .fbnet_mobilenet import FBNet_MobileNetV2Imagenet 8 | import tensorflow as tf 9 | 10 | 11 | class TestFBnetMobileNet(unittest.TestCase): 12 | @classmethod 13 | def setUpClass(cls): 14 | cls.img = np.ones((1, 224, 224, 3), dtype=np.float32) 15 | 16 | cls.test_mapping = { 17 | "conv2d_01": 24, 18 | "irb_01": 20, 19 | "irb_02": 40, 20 | "irb_03": 32, 21 | "irb_04": 40, 22 | "irb_05": 80, 23 | "irb_06": 64, 24 | "irb_07": 80, 25 | "irb_08": 160, 26 | "irb_09": 96, 27 | "irb_10": 152, 28 | "irb_11": 224, 29 | "irb_12": 136, 30 | "irb_13": 224, 31 | "irb_14": 160, 32 | "irb_15": 352, 33 | "irb_16": 368, 34 | "irb_17": 336 35 | } 36 | 37 | cls.tempdir = tempfile.mkdtemp() 38 | cls.file_path = cls.tempdir + '/test.yaml' 39 | with open(cls.file_path, 'w') as f: 40 | yaml.dump(cls.test_mapping, f) 41 | 42 | cls.channel_last = True # TODO test for channels first 43 | 44 | def test_init(self): 45 | print(self.img[1:]) 46 | params = { 47 | 'input_size': self.img.shape[1:], 48 | 'changing_ids': [], 49 | 'num_classes': 10, 50 | 'factor': 1 51 | } 52 | 53 | model = FBNet_MobileNetV2Imagenet(**params).build() 54 | 55 | # model.summary() 56 | get_dict = {} 57 | for layer in model.layers: 58 | # This type of checking the channels based on the architecture is not ideal. 59 | # For this specific case we use the projection of the MobileNet block to get the channels used. 60 | if layer.name.startswith('conv2d_01') or layer.name.endswith('project'): 61 | get_dict[layer.name.split('project')[0]] = layer.output.shape[-1] if self.channel_last else layer.output.shape[1] 62 | 63 | # TODO correct this test 64 | # self.assertDictEqual(get_dict,self.test_mapping) 65 | shutil.rmtree(self.tempdir) 66 | -------------------------------------------------------------------------------- /src/models/test_generic_model.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | import tensorflow as tf 4 | from unittest.mock import MagicMock, patch 5 | from .generic_model import Layer, GenericModel 6 | 7 | # sys.modules['upstride.type2.tf.keras.layers'] = MagicMock() 8 | # sys.modules['upstride'] = MagicMock() 9 | 10 | 11 | class TestLayer(unittest.TestCase): 12 | def test_n_layers_before_tf(self): 13 | layer = Layer("tensorflow", n_layers_before_tf=3) 14 | # n_layers_before_tf is ignored with "tensorflow" 15 | self.assertEqual(layer(), tf.keras.layers) 16 | 17 | layer = Layer("upstride_type2", n_layers_before_tf=3) 18 | self.assertNotEqual(layer(), tf.keras.layers) 19 | self.assertNotEqual(layer(), tf.keras.layers) 20 | self.assertNotEqual(layer(), tf.keras.layers) 21 | self.assertNotEqual(layer(), tf.keras.layers) 22 | 23 | layer = Layer("mix_type2", n_layers_before_tf=3) 24 | self.assertNotEqual(layer(), tf.keras.layers) 25 | self.assertNotEqual(layer(), tf.keras.layers) 26 | self.assertNotEqual(layer(), tf.keras.layers) 27 | self.assertEqual(layer(), tf.keras.layers) 28 | 29 | 30 | class Model1(GenericModel): 31 | def model(self): 32 | self.x = self.layers().Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(self.x) 33 | self.x = self.layers().Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(self.x) 34 | self.x = self.layers().MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(self.x) 35 | 36 | 37 | class TestModel1(unittest.TestCase): 38 | def test_model(self): 39 | # This unit test doesn't work anymore because we can't know which engine is used 40 | pass 41 | # model = Model1('mix_type2', factor=4, n_layers_before_tf=1).model 42 | 43 | # # got with model.summary() 44 | # model.summary() 45 | # layer_names = [ 46 | # 'InputLayer', 47 | # 'TF2Upstride', 48 | # 'Upstride_2_Conv2D', 49 | # 'Upstride2TF', 50 | # 'Conv2D', 51 | # 'MaxPooling2D', 52 | # 'Activation', 53 | # ] 54 | 55 | # for i in range(7): 56 | # print(model.get_layer(index=i)) 57 | -------------------------------------------------------------------------------- /src/data/test_dataloader.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import shutil 4 | import tempfile 5 | import unittest 6 | import cv2 7 | import numpy as np 8 | import tensorflow as tf 9 | from . import dataloader 10 | 11 | 12 | class TestDataLoader(unittest.TestCase): 13 | def test_map_fn(self): 14 | transformation_list = ['ResizeThenRandomCrop'] 15 | config = { 16 | 'ResizeThenRandomCrop': { 17 | "size": [256, 256], 18 | "crop_size": [224, 224, 3], 19 | "interpolation": 'bicubic' 20 | } 21 | } 22 | map_fn = dataloader.get_map_fn(transformation_list, config, n_classes=2) 23 | dataset_dir = create_fake_dataset() 24 | image = cv2.imread(os.path.join(dataset_dir, 'dog/1.jpg')) 25 | image = tf.convert_to_tensor(image) 26 | image, label = map_fn(image, tf.convert_to_tensor(1)) 27 | self.assertEqual(label.numpy()[0], 0) 28 | self.assertEqual(label.numpy()[1], 1) 29 | self.assertTrue(np.allclose(image.numpy(), np.ones((224, 224, 3) , dtype=np.float32)*255)) 30 | 31 | def test_get_dataset_from_tfds(self): 32 | config = { 33 | 'name': 'mnist', 34 | 'data_dir': None, 35 | 'batch_size': 7, 36 | 'train_split_id': 'train' 37 | } 38 | dataset = dataloader.get_dataset_from_tfds(config, [], 10, split='train') 39 | 40 | i = 0 41 | for image, label in dataset: 42 | self.assertEqual(label.numpy().shape, (7, 10)) 43 | self.assertTrue(label.numpy()[0, 0] in [0, 1]) 44 | self.assertTrue(label.numpy()[1, 1] in [0, 1]) 45 | self.assertEqual(image.numpy().shape, (7, 28, 28, 1)) 46 | i += 1 47 | if i == 3: 48 | break 49 | 50 | self.assertEqual(i, 3) 51 | 52 | 53 | def create_fake_dataset(n_images_per_class=2): 54 | dataset_dir = tempfile.mkdtemp() 55 | os.makedirs(os.path.join(dataset_dir, 'cat')) 56 | os.makedirs(os.path.join(dataset_dir, 'dog')) 57 | for i in range(n_images_per_class): 58 | cv2.imwrite(os.path.join(dataset_dir, 'dog', '{}.jpg'.format(i)), np.ones((640, 480, 3), dtype=np.uint8) * 255) 59 | cv2.imwrite(os.path.join(dataset_dir, 'cat', '{}.jpg'.format(i)), np.ones((640, 480, 3), dtype=np.uint8) * 255) 60 | return dataset_dir 61 | -------------------------------------------------------------------------------- /src/losses.py: -------------------------------------------------------------------------------- 1 | from src.models.fbnetv2 import ChannelMasking 2 | from submodules.global_dl.training import metrics 3 | import tensorflow as tf 4 | 5 | def _count_parameters_conv2d(layer): 6 | if type(layer.input_shape) is list: 7 | input_shape = layer.input_shape[0] 8 | else: 9 | input_shape = layer.input_shape 10 | 11 | if type(layer.output_shape) is list: 12 | output_shape = layer.output_shape[0] 13 | else: 14 | output_shape = layer.output_shape 15 | 16 | if layer.data_format == "channels_first": 17 | input_channels = layer.input_shape[1] 18 | output_channels, h, w, = output_shape[1:] 19 | elif layer.data_format == "channels_last": 20 | input_channels = input_shape[3] 21 | h, w, output_channels = output_shape[1:] 22 | w_h, w_w = layer.kernel_size 23 | 24 | num_params = output_channels * input_channels * w_h * w_w 25 | 26 | if layer.use_bias: 27 | num_params += output_channels 28 | 29 | return int(num_params) 30 | 31 | 32 | def flops_loss(model): 33 | """loss function defined by number of flops, usefull for Differential Architecture Search 34 | 35 | This function is compatible both with TensorFlow and UpStride engine 36 | 37 | Args: 38 | model: Keras model containing some ChannelMasking layers 39 | 40 | Returns: 41 | float: loss 42 | """ 43 | loss = 0 44 | for layer in model.layers: 45 | if "Conv2D" in str(type(layer)) and "Depthwise" not in str(type(layer)): 46 | flops = metrics._count_flops_conv2d(layer) 47 | if type(layer) == ChannelMasking: 48 | # flops is the number of flops of the channel just before ChannelMasking 49 | g = layer.g 50 | param_ratio = [flops * (layer.min + i * layer.step)/layer.max for i in range(layer.g.shape[0])] 51 | loss += tf.math.reduce_sum(g * tf.convert_to_tensor(param_ratio)) 52 | return loss 53 | 54 | 55 | def parameters_loss(model): 56 | loss = 0 57 | for layer in model.layers: 58 | if "Conv2D" in str(type(layer)) and "Depthwise" not in str(type(layer)): 59 | n_params = _count_parameters_conv2d(layer) 60 | if type(layer) == ChannelMasking: 61 | # parameters are the number of parameters of the channel just before ChannelMasking 62 | g = layer.g 63 | param_ratio = [n_params * (layer.min + i * layer.step)/layer.max for i in range(g.shape[0])] 64 | loss += tf.math.reduce_sum(g * tf.convert_to_tensor(param_ratio)) 65 | return loss 66 | -------------------------------------------------------------------------------- /scripts/bayesian_opt_results_parser.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import seaborn as sns 4 | import pandas as pd 5 | import matplotlib 6 | import matplotlib.pyplot as plt 7 | import upstride_argparse as argparse 8 | 9 | arguments = [ 10 | [str, "server", '', 'address of the server to connect using ssh'], 11 | [str, 'remote_dir', '', "directory of the keras tuner experiment on the remote server"], 12 | [str, 'csv_path', '/tmp/results.csv', 'path to write csv file'], 13 | [bool, 'no_plot', False, 'if true then don\'t plot the results'] 14 | ] 15 | 16 | plot = True 17 | try: 18 | matplotlib.use("GTK3Agg") 19 | except ImportError: 20 | print("can't load matplotlib") 21 | plot = False 22 | sns.set(style="darkgrid") 23 | 24 | 25 | def run_bash(cmd: str): 26 | stream = os.popen(cmd) 27 | return stream.read() 28 | 29 | 30 | def test_split_json(): 31 | print(split_json("{}{qsdf}{sdfqfh}")) 32 | 33 | 34 | def split_json(cmd_out): 35 | jsons = [] 36 | n_accol = 0 37 | previous_split_char = 0 38 | for i, c in enumerate(cmd_out): 39 | if c == '{': 40 | n_accol += 1 41 | if c == '}': 42 | n_accol -= 1 43 | if n_accol == 0: 44 | # then split the json 45 | jsons.append(cmd_out[previous_split_char: i+1]) 46 | previous_split_char = i+1 47 | return jsons 48 | 49 | 50 | def parse_str(e): 51 | if e is None: 52 | return '0' 53 | return str(e) 54 | 55 | 56 | def main(): 57 | global plot 58 | args = argparse.parse_cmd(arguments) 59 | if args['no_plot']: 60 | plot = False 61 | server = args['server'] 62 | remote_dir = args['remote_dir'] 63 | out = run_bash(f'ssh {server} "cd {remote_dir} && cat */trial.json"') 64 | jsons = split_json(out) 65 | csv_content = 'experiment_id,factor,framework,depth,score\n' 66 | for trial in jsons: 67 | trial = json.loads(trial) 68 | values = trial['hyperparameters']['values'] 69 | csv_values = [trial['trial_id'], values['factor'], values['framework'], values['depth'], trial['score']] 70 | csv_content += ','.join(list(map(parse_str, csv_values))) + '\n' 71 | 72 | with open(args['csv_path'], 'w') as f: 73 | f.write(csv_content) 74 | 75 | if not plot: 76 | return 77 | 78 | # plot the results 79 | data = pd.read_csv(args['csv_path']) 80 | print(data) 81 | f, ax = plt.subplots(figsize=(9, 6)) 82 | data = data.drop_duplicates(subset=["factor", "depth"]) 83 | data = data.pivot("factor", "depth", "score") 84 | print(data) 85 | sns.heatmap(data, annot=True, vmin=0, vmax=1, cmap='CMRmap') 86 | plt.show() 87 | 88 | 89 | if __name__ == "__main__": 90 | # test_split_json() 91 | main() 92 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # UpStride Classification API 2 | 3 | [![TensorFlow 2.3](https://img.shields.io/badge/TensorFlow-2.3-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v2.3.0) 4 | [![Python 3.6](https://img.shields.io/badge/Python-3.6-3776AB)](https://www.python.org/downloads/release/python-360/) 5 | 6 | 7 | ## What is this repository? 8 | 9 | Hi there 👋 10 | 11 | We are really excited today to open our Github to the world! After months of research and development we decided to start giving back to the community with our first open-source repository. 12 | 13 | We are sharing an image classification code that we use internally to benchmark our engine on several datasets. 14 | 15 | This training script has also been shared to our clients to smooth the first time use of our product and simplify the creation of state-of-the-art neural networks. 16 | 17 | We hope it will bring value to you as well! 18 | 19 | Here you will find how to convert your images dataset to TFRecord format, load them with an efficient data pipeline with various data augmentation strategies and classic deep learning models. 20 | 21 | We are also working on an integration of Keras-Tuner for metaparameter search and Differential architectures search method for auto-ml experiments. 22 | 23 | We will explain here how to use it with and without Upstride API. 24 | 25 | If you're interested in trying this script powered by Upstride technology 🚀 feel free to reach out to us at hello@upstride.io 26 | 27 | 28 | ## How do I get set up? 29 | 30 | Start off by cloning this repository; be careful it uses git submodules, please proceed by doing a`git clone --recurse-submodules`. 31 | 32 | If you forgot to do it, don't panic you can still do a `git submodule update --init`. 33 | 34 | The easiest way is to use docker; we provide 2 docker files, one to run these code using Tensorflow 2.3, and the other one to run UpStride 1.0. 35 | 36 | You can build them by using `make build` or `make build_tensorflow`. 37 | 38 | The tensorflow docker image will soon be on dockerhub. 39 | 40 | 41 | ## How do I start a training ? 42 | 43 | ![training](ressources/training.gif) 44 | 45 | 46 | 47 | See the 🎓 [Documentation](documentation/doc.md) 48 | 49 | or 50 | 51 | Get started with [cat vs dog classification example](documentation/cat_dog.ipynb) 52 | 53 | ## Unittesting and code coverage 54 | * To run the unittests, run `python test.py` 55 | * To get the coverage, run `coverage run test.py`. Then `coverage report` show the coverage information and `coverage xml` create a file usable by VSCode 56 | 57 | ## Would you like to contribute? 58 | 59 | If you discovered a bug or have any idea please raise an issue. If you wish to contribute, pull requests are also welcome. 60 | 61 | We will try to review them as fast as possible to integrate your work in a timely manner. 62 | 63 | Unit-tests with pull requests are also welcome to smooth this process. 64 | 65 | Thank you! 66 | ✌️ 67 | -------------------------------------------------------------------------------- /src/models/vgg.py: -------------------------------------------------------------------------------- 1 | """ code came from https://github.com/keras-team/keras-applications/blob/master/keras_applications/vgg16.py 2 | """ 3 | import tensorflow as tf 4 | from .generic_model import GenericModelBuilder 5 | 6 | 7 | class VGG16(GenericModelBuilder): 8 | def model(self, x): 9 | # Block 1 10 | x = self.layers.Conv2D(64//self.factor, (3, 3), padding='same', name='block1_conv1')(x) 11 | x = self.layers.Activation('relu')(x) 12 | x = self.layers.Conv2D(64//self.factor, (3, 3), padding='same', name='block1_conv2')(x) 13 | x = self.layers.Activation('relu')(x) 14 | x = self.layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) 15 | 16 | # Block 2 17 | x = self.layers.Conv2D(128//self.factor, (3, 3), padding='same', name='block2_conv1')(x) 18 | x = self.layers.Activation('relu')(x) 19 | x = self.layers.Conv2D(128//self.factor, (3, 3), padding='same', name='block2_conv2')(x) 20 | x = self.layers.Activation('relu')(x) 21 | x = self.layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) 22 | 23 | # Block 3 24 | x = self.layers.Conv2D(256//self.factor, (3, 3), padding='same', name='block3_conv1')(x) 25 | x = self.layers.Activation('relu')(x) 26 | x = self.layers.Conv2D(256//self.factor, (3, 3), padding='same', name='block3_conv2')(x) 27 | x = self.layers.Activation('relu')(x) 28 | x = self.layers.Conv2D(256//self.factor, (3, 3), padding='same', name='block3_conv3')(x) 29 | x = self.layers.Activation('relu')(x) 30 | x = self.layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) 31 | 32 | # Block 4 33 | x = self.layers.Conv2D(512//self.factor, (3, 3), padding='same', name='block4_conv1')(x) 34 | x = self.layers.Activation('relu')(x) 35 | x = self.layers.Conv2D(512//self.factor, (3, 3), padding='same', name='block4_conv2')(x) 36 | x = self.layers.Activation('relu')(x) 37 | x = self.layers.Conv2D(512//self.factor, (3, 3), padding='same', name='block4_conv3')(x) 38 | x = self.layers.Activation('relu')(x) 39 | x = self.layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) 40 | 41 | # Block 5 42 | x = self.layers.Conv2D(512//self.factor, (3, 3), padding='same', name='block5_conv1')(x) 43 | x = self.layers.Activation('relu')(x) 44 | x = self.layers.Conv2D(512//self.factor, (3, 3), padding='same', name='block5_conv2')(x) 45 | x = self.layers.Activation('relu')(x) 46 | x = self.layers.Conv2D(512//self.factor, (3, 3), padding='same', name='block5_conv3')(x) 47 | x = self.layers.Activation('relu')(x) 48 | x = self.layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) 49 | 50 | # Classification block 51 | x = self.layers.Flatten(name='flatten')(x) 52 | x = self.layers.Dense(4096//self.factor, name='fc1')(x) 53 | x = self.layers.Activation('relu')(x) 54 | x = self.layers.Dense(4096//self.factor, name='fc2')(x) 55 | x = self.layers.Activation('relu')(x) 56 | return x 57 | -------------------------------------------------------------------------------- /src/models/squeezenet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .generic_model import GenericModelBuilder 3 | 4 | 5 | class SqueezeNet(GenericModelBuilder): 6 | def model(self, x): 7 | x = self.layers.Conv2D(filters=64 // self.factor, kernel_size=3, strides=2, padding='valid', name='conv1')(x) 8 | 9 | x = self.layers.Activation('relu', name='relu_conv1')(x) 10 | x = self.layers.MaxPooling2D(pool_size=3, strides=2, name='pool1')(x) 11 | 12 | x = self.fire_module(x, fire_id=2, s1x1=16 // self.factor, e1x1=64 // self.factor, e3x3=64 // self.factor) 13 | x = self.fire_module(x, fire_id=3, s1x1=16 // self.factor, e1x1=64 // self.factor, e3x3=64 // self.factor) 14 | x = self.layers.MaxPooling2D(pool_size=3, strides=2, name='pool3')(x) 15 | 16 | x = self.fire_module(x, fire_id=4, s1x1=32 // self.factor, e1x1=128 // self.factor, e3x3=128 // self.factor) 17 | x = self.fire_module(x, fire_id=5, s1x1=32 // self.factor, e1x1=128 // self.factor, e3x3=128 // self.factor) 18 | x = self.layers.MaxPooling2D(pool_size=3, strides=2, name='pool5')(x) 19 | 20 | x = self.fire_module(x, fire_id=6, s1x1=48 // self.factor, e1x1=192 // self.factor, e3x3=192 // self.factor) 21 | x = self.fire_module(x, fire_id=7, s1x1=48 // self.factor, e1x1=192 // self.factor, e3x3=192 // self.factor) 22 | x = self.fire_module(x, fire_id=8, s1x1=64 // self.factor, e1x1=256 // self.factor, e3x3=256 // self.factor) 23 | x = self.fire_module(x, fire_id=9, s1x1=64 // self.factor, e1x1=256 // self.factor, e3x3=256 // self.factor) 24 | 25 | x = self.layers.Dropout(0.5, name='drop9')(x) 26 | x = self.layers.Conv2D(filters=self.num_classes, kernel_size=1, padding='valid', name='conv10')(x) 27 | x = self.layers.Activation('relu', name='relu_conv10')(x) 28 | x = self.layers.GlobalAveragePooling2D()(x) 29 | return x 30 | 31 | def fire_module(self, x, fire_id, s1x1=16, e1x1=64, e3x3=64): 32 | """tf.keras 33 | 34 | Args: 35 | x: input from the previous layer 36 | fire_id: id of fire module 37 | s1x1: filter size of squeeze layer 38 | e1x1: filter size of 1x1 expand layer 39 | e3x3: filter size of 3x3 expand layer 40 | Returns: 41 | a keras tensor 42 | """ 43 | 44 | s_id = 'fire' + str(fire_id) + '/' 45 | 46 | x = self.layers.Conv2D(filters=s1x1, kernel_size=1, padding='valid', name=s_id + 'squeeze1x1_conv')(x) 47 | x = self.layers.Activation('relu', name=s_id + 'squeeze1x1_relu')(x) 48 | 49 | expand1x1 = self.layers.Conv2D(filters=e1x1, kernel_size=1, padding='valid', name=s_id + 'expand1x1_conv')(x) 50 | expand1x1 = self.layers.Activation('relu', name=s_id + 'expand1x1_relu')(expand1x1) 51 | 52 | expand3x3 = self.layers.Conv2D(filters=e3x3, kernel_size=3, padding='same', name=s_id + 'expand3x3_conv')(x) 53 | expand3x3 = self.layers.Activation('relu', name=s_id + 'expand3x3_relu')(expand3x3) 54 | 55 | x = self.layers.Concatenate(axis=self.channel_axis, name=s_id + 'concat')([expand1x1, expand3x3]) 56 | 57 | return x 58 | -------------------------------------------------------------------------------- /inference_client.py: -------------------------------------------------------------------------------- 1 | import zmq 2 | import numpy as np 3 | import tensorflow as tf 4 | import upstride_argparse as argparse 5 | from src.data import dataloader, augmentations 6 | 7 | args_spec = [ 8 | # dataloader specification to run inference on a public dataset 9 | [int, "num_classes", 0, 'Number of classes', lambda x: x > 0], 10 | ['namespace', 'dataloader', [ 11 | ['list[str]', 'list', ['Resize', 'CentralCrop', 'Normalize'], 'Comma-separated list of data augmentation operations'], 12 | [str, "data_dir", '', "directory to read/write data. Defaults to \"~/tensorflow_datasets\""], 13 | [str, 'name', None, 'Choose the dataset to be used', lambda x: not (x is None)], 14 | [str, 'split_id', 'validation', 'Split id in the dataset to use'], 15 | [int, 'batch_size', 1, 'The size of batch per gpu', lambda x: x > 0], 16 | ] + augmentations.arguments], 17 | 18 | # networking parameters 19 | [int, 'zmq_port', 5555, 'Specify the port to connect the ZMQ socket', lambda x: x > 0], 20 | ] 21 | 22 | 23 | def get_dataset(args): 24 | args['dataloader']['train_split_id'] = None 25 | dataset = dataloader.get_dataset(args['dataloader'], transformation_list=args['dataloader']['list'], 26 | num_classes=args["num_classes"], split=args['dataloader']['split_id']) 27 | return dataset 28 | 29 | 30 | def create_zmq_socket(port): 31 | context = zmq.Context() 32 | socket = context.socket(zmq.REQ) 33 | socket.connect("tcp://localhost:" + str(port)) 34 | return socket 35 | 36 | 37 | def send_and_evaluate_record(record, socket): 38 | img = record[0].numpy().astype('float16') 39 | val = record[1].numpy() 40 | socket.send(img) 41 | reply = socket.recv() 42 | res = np.frombuffer(reply, dtype='float32').reshape(val.shape) 43 | total = val.shape[0] 44 | correct = [val[j][np.argmax(res[j])] == 1 for j in range(total)].count(True) 45 | return total, correct 46 | 47 | 48 | def send_and_evaluate_dataset(dataset, socket): 49 | sent_records_count = 0 50 | logging_frequency = 10 51 | correct_count = 0 52 | images_count = 0 53 | for record in dataset: 54 | if sent_records_count % logging_frequency == 0: 55 | accuracy = 100.0 * correct_count / images_count if images_count > 0 else float("nan") 56 | print("Records sent: %d, accuracy: %0.2f%%" % (sent_records_count, accuracy)) 57 | total, correct = send_and_evaluate_record(record, socket) 58 | images_count = images_count + total 59 | correct_count = correct_count + correct 60 | sent_records_count += 1 61 | 62 | print("Total records sent:", sent_records_count) 63 | return images_count, correct_count 64 | 65 | 66 | def main(): 67 | args = argparse.parse_cmd(args_spec) 68 | dataset = get_dataset(args) 69 | socket = create_zmq_socket(args['zmq_port']) 70 | images_count, correct_count = send_and_evaluate_dataset(dataset, socket) 71 | accuracy = correct_count / images_count 72 | print("Accuracy of the remote model:", accuracy) 73 | 74 | if __name__ == '__main__': 75 | main() -------------------------------------------------------------------------------- /scripts/alpha_viz.py: -------------------------------------------------------------------------------- 1 | import math 2 | import json 3 | import matplotlib 4 | import matplotlib.pyplot as plt 5 | import matplotlib.cm as cm 6 | import numpy as np 7 | 8 | import upstride_argparse as argparse 9 | 10 | arguments = [ 11 | [str, "alpha_path", '', 'path of the alpha file to parse'], 12 | [int, "epoch", 0, 'if different than 0 then visualize a single epoch'], 13 | [int, "min", 0, 'if provided, then define the minimum epoch to visualize'], 14 | [int, "max", 0, 'if provided, then define the maximum epoch to visualize'], 15 | [int, "step", 100, 'number of steps between 2 epochs to visualize'], 16 | ['list[str]', "params", [], 'if specified, list of parameters to visualize'] 17 | ] 18 | 19 | 20 | def prepare_data(args): 21 | with open(args['alpha_path'], 'r') as f: 22 | alphas = json.load(f) 23 | # find min and max epochs 24 | epochs = list(map(int, alphas.keys())) 25 | epochs.sort() 26 | 27 | min_epoch = max(epochs[0], args['min']) if args['min'] else epochs[0] 28 | max_epoch = min(epochs[-1], args['max']) if args['max'] else epochs[-1] 29 | 30 | if args['epoch']: 31 | min_epoch = args['epoch'] 32 | max_epoch = args['epoch'] 33 | 34 | # find parameters to visualize, and remove final '_savable' 35 | params = alphas[str(min_epoch)].keys() 36 | params = list(map(lambda x: x[:-8], alphas[str(min_epoch)].keys())) 37 | if args['params']: 38 | params = args['params'] 39 | print("display:", params) 40 | print('epochs:', min_epoch, max_epoch) 41 | return alphas, min_epoch, max_epoch, params 42 | 43 | 44 | def main(): 45 | matplotlib.use("GTK3Agg") 46 | args = argparse.parse_cmd(arguments) 47 | alphas, min_epoch, max_epoch, params = prepare_data(args) 48 | number_bars = (max_epoch - min_epoch) // args["step"] + 1 49 | colors = cm.OrRd_r(np.linspace(.2, .6, number_bars)) 50 | 51 | # grid has a fixed number of columns of 5 52 | n_params = len(params) 53 | if n_params == 1: 54 | fig, axs = plt.subplots(1, 1, figsize=(9, 3)) 55 | axs = [[axs]] 56 | elif n_params <= 5: 57 | fig, axs = plt.subplots(math.ceil(n_params), 1, figsize=(9, 3)) 58 | axs = [axs] 59 | else: 60 | fig, axs = plt.subplots(math.ceil(n_params/5), 5, figsize=(9, 3)) 61 | fig.suptitle(f'Alpha parameter between {min_epoch} and {max_epoch} epochs (step: {args["step"]})') 62 | 63 | total_width = 0.7 64 | width = total_width / number_bars 65 | for i in range(number_bars): 66 | epoch = min_epoch + i * args["step"] 67 | for k, param in enumerate(params): 68 | p = alphas[str(epoch)][param + '_savable'] 69 | 70 | # TODO should be removed as soon as data is better formated 71 | data = {} 72 | for j in range(len(p)): 73 | data[str(j)] = p[j] 74 | 75 | names = list(data.keys()) 76 | values = list(data.values()) 77 | 78 | x = np.arange(len(p)) 79 | axs[k//5][k % 5].bar(x - total_width/2 + width * i, values, width, label=str(i)) #, color=colors) 80 | axs[k//5][k%5].set_title(param) 81 | 82 | # fig.tight_layout() 83 | plt.show() 84 | 85 | 86 | if __name__ == "__main__": 87 | main() 88 | -------------------------------------------------------------------------------- /scripts/hyperband_results_parser.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import sys 4 | import seaborn as sns 5 | import pandas as pd 6 | import matplotlib 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | 10 | plot = True 11 | try: 12 | matplotlib.use("GTK3Agg") 13 | except ImportError: 14 | print("can't load matplotlib") 15 | plot = False 16 | 17 | 18 | 19 | def main(csv_path): 20 | if not os.path.exists(csv_path): 21 | all_data = [] 22 | all_keys = ['score'] 23 | for d in os.listdir('.'): 24 | if not os.path.isdir(d): 25 | continue 26 | with open(os.path.join(d, 'trial.json'), 'r') as f: 27 | json_data = json.load(f) 28 | data = json_data['hyperparameters']['values'] 29 | data['score'] = json_data['score'] 30 | data['name'] = d 31 | all_data.append(data) 32 | for key in data: 33 | if key not in all_keys: 34 | all_keys.append(key) 35 | # write csv 36 | with open(csv_path, 'w') as f: 37 | f.write(",".join(all_keys)) 38 | f.write("\n") 39 | for data in all_data: 40 | to_write = [] 41 | for key in all_keys: 42 | if key in data: 43 | if data[key] is None: 44 | data[key] = 0 45 | to_write.append(str(data[key])) 46 | else: 47 | to_write.append('') 48 | f.write(','.join(to_write)) 49 | f.write('\n') 50 | print(f"file {csv_path} written") 51 | # also start a visu with seaborn 52 | 53 | if not plot: 54 | return 55 | sns.set(style="darkgrid") 56 | data = pd.read_csv(csv_path) 57 | 58 | # data['depth'] = data['conv3_depth'] + data['conv4_depth'] 59 | data['depth'] *= 2 60 | depths = data['depth'].unique() 61 | depths.sort() 62 | factors = data['factor'].unique() 63 | factors.sort() 64 | if 'tuner/trial_id' in data: 65 | data = data.drop(columns=['tuner/trial_id']) 66 | 67 | g = sns.FacetGrid(data, col="tuner/epochs", legend_out=True) 68 | g.map_dataframe(draw_heatmap, 'factors', 'depths', factors=factors, depths=depths) 69 | g.add_legend() 70 | plt.show() 71 | 72 | def draw_heatmap(*args, **kwargs): 73 | '''from https://stackoverflow.com/questions/41471238/how-to-make-heatmap-square-in-seaborn-facetgrid 74 | ''' 75 | data = kwargs.pop('data') 76 | data['score'] = data['score'] * 100 77 | data = data.pivot(index='depth', columns='factor', values='score') 78 | # add missing factor 79 | for f in kwargs['factors']: 80 | if f not in data: 81 | data[f] = np.nan 82 | # add missing indexes 83 | index_to_add = [] 84 | for d in kwargs['depths']: 85 | if d not in data.index: 86 | index_to_add.append(d) 87 | new_indexes = data.index.values.tolist() + index_to_add 88 | new_indexes.sort() 89 | data = data.reindex(new_indexes) 90 | data = data.reindex(sorted(data.columns), axis=1) 91 | 92 | sns.heatmap(data, annot=True, vmin=0, vmax=100, cmap='CMRmap') 93 | 94 | # sns.heatmap(matrix, annot=True, linewidth=0.5, xticklabels=factors, yticklabels=depths, vmin=0, vmax=100, cmap='CMRmap', mask=mask) 95 | 96 | 97 | if __name__ == "__main__": 98 | csv_path = sys.argv[1] 99 | main(csv_path) 100 | -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import tensorflow as tf 3 | import tensorflow.keras.layers as tf_layers 4 | 5 | from .alexnet import AlexNet, AlexNetQ, AlexNetToy 6 | from .mobilenet import MobileNetV2, MobileNetV2Cifar10, MobileNetV2Cifar10_2, MobileNetV2Cifar10Hyper 7 | from .mobilenet_v3 import MobileNetV3Large, MobileNetV3Small, MobileNetV3LargeCIFAR, MobileNetV3SmallCIFAR 8 | 9 | from .resnet import (ResNet18, ResNet34, ResNet50, ResNet101, ResNet152, 10 | ResNet20CIFAR, ResNet32CIFAR, ResNet44CIFAR, ResNet56CIFAR, ResNetHyper) 11 | from .wide_resnet import WideResNet28_10, WideResNet40_2 12 | from .squeezenet import SqueezeNet 13 | from .tiny_darknet import TinyDarknet 14 | from .vgg import VGG16 15 | from .nasnet import NASNetLarge, NASNetMobile, NASNetCIFAR 16 | from .efficientnet import EfficientNetB0, EfficientNetB1, EfficientNetB2, EfficientNetB3, EfficientNetB4, EfficientNetB5, EfficientNetB6, EfficientNetB7 17 | from .hypermodels import SimpleHyper 18 | from .fbnet_mobilenet import FBNet_MobileNetV2Imagenet, FBNet_MobileNetV2CIFAR, FBNet_MobileNetV2CIFARUP 19 | from .pdart import PdartsCIFAR, PdartsImageNet 20 | from .complexnet import ShallowComplexNet, DeepComplexNet, WSComplexNetTF, WSComplexNetUpStride, DNComplexNetTF, DNComplexNetUpStride, IBComplexNetTF, IBComplexNetUpStride 21 | 22 | 23 | # to prevent Keras to bug for too big models. 24 | # for instance ResNet152 with type2 does not work without this 25 | sys.setrecursionlimit(10000) 26 | 27 | model_name_to_class = { 28 | "AlexNet": AlexNet, 29 | "AlexNetQ": AlexNetQ, 30 | "AlexNetToy": AlexNetToy, 31 | "EfficientNetB0": EfficientNetB0, 32 | "EfficientNetB1": EfficientNetB1, 33 | "EfficientNetB2": EfficientNetB2, 34 | "EfficientNetB3": EfficientNetB3, 35 | "EfficientNetB4": EfficientNetB4, 36 | "EfficientNetB5": EfficientNetB5, 37 | "EfficientNetB6": EfficientNetB6, 38 | "EfficientNetB7": EfficientNetB7, 39 | "MobileNetV2": MobileNetV2, 40 | "MobileNetV2Cifar10": MobileNetV2Cifar10, 41 | "MobileNetV2Cifar10_2": MobileNetV2Cifar10_2, 42 | "NASNetCIFAR": NASNetCIFAR, 43 | "NASNetLarge": NASNetLarge, 44 | "NASNetMobile": NASNetMobile, 45 | "ResNet18": ResNet18, 46 | "ResNet34": ResNet34, 47 | "ResNet50": ResNet50, 48 | "ResNet101": ResNet101, 49 | "ResNet152": ResNet152, 50 | "ResNet20CIFAR": ResNet20CIFAR, 51 | "ResNet32CIFAR": ResNet32CIFAR, 52 | "ResNet44CIFAR": ResNet44CIFAR, 53 | "ResNet56CIFAR": ResNet56CIFAR, 54 | "WideResNet28_10": WideResNet28_10, 55 | "WideResNet40_2": WideResNet40_2, 56 | "SqueezeNet": SqueezeNet, 57 | "TinyDarknet": TinyDarknet, 58 | "VGG16": VGG16, 59 | "MobileNetV3Large": MobileNetV3Large, 60 | "MobileNetV3Small": MobileNetV3Small, 61 | "MobileNetV3LargeCIFAR": MobileNetV3LargeCIFAR, 62 | "MobileNetV3SmallCIFAR": MobileNetV3SmallCIFAR, 63 | # Pdart model 64 | "PdartsCIFAR": PdartsCIFAR, 65 | "PdartsImageNet": PdartsImageNet, 66 | # FIXME Below commented models have stale code and needs refactoring when prioritized. 67 | # # Hyper Model 68 | # "SimpleHyper": SimpleHyper, 69 | # "ResNetHyper": ResNetHyper, 70 | # "MobileNetV2Cifar10Hyper": MobileNetV2Cifar10Hyper, 71 | # # Architecture Search models 72 | # "FBNet_MobileNetV2Imagenet": FBNet_MobileNetV2Imagenet, 73 | # "FBNet_MobileNetV2CIFAR": FBNet_MobileNetV2CIFAR, 74 | # "FBNet_MobileNetV2CIFARUP": FBNet_MobileNetV2CIFARUP, 75 | # complexnet 76 | "ShallowComplexNet": ShallowComplexNet, 77 | "DeepComplexNet": DeepComplexNet, 78 | "WSComplexNetTF": WSComplexNetTF, 79 | "WSComplexNetUpStride": WSComplexNetUpStride, 80 | "DNComplexNetTF": DNComplexNetTF, 81 | "DNComplexNetUpStride": DNComplexNetUpStride, 82 | "IBComplexNetTF": IBComplexNetTF, 83 | "IBComplexNetUpStride": IBComplexNetUpStride, 84 | } 85 | -------------------------------------------------------------------------------- /tests/unit_tests/test_channels_first_last.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | 4 | from tqdm import tqdm 5 | 6 | import tensorflow as tf 7 | from src.models import model_name_to_class 8 | 9 | class TestCompareChannelsFirstLast(unittest.TestCase): 10 | @classmethod 11 | def setUpClass(cls): 12 | cls.model_kwargs = { 13 | 'input_size': [224, 224, 3], 14 | 'changing_ids': [], 15 | 'num_classes': 10, 16 | } 17 | 18 | cls.list_of_models = model_name_to_class.values() 19 | 20 | # below model do not work for upstride types 21 | remove_models = [ 22 | # These models do not match EfficientDet definition. There is a open Pull request yet to be merged. 23 | # This should be included once the pull request is merged. 24 | "EfficientNetB0", 25 | "EfficientNetB1", 26 | "EfficientNetB2", 27 | "EfficientNetB3", 28 | "EfficientNetB4", 29 | "EfficientNetB5", 30 | "EfficientNetB6", 31 | "EfficientNetB7", 32 | # SeparableConv2D is not supported for upstride types 33 | "NASNetCIFAR", 34 | "NASNetLarge", 35 | "NASNetMobile", 36 | ] 37 | 38 | tmp_list_models = model_name_to_class 39 | # remove models that are not supported 40 | [tmp_list_models.pop(model) for model in remove_models] 41 | cls.list_models_upstride = tmp_list_models.values() 42 | 43 | def test_compare_model_params_tensorflow(self): 44 | self.model_kwargs.update({"upstride_type": -1, "factor": 1}) 45 | print("Building models for Channels_first and Channels_last for Tensorflow and Compare") 46 | for model in tqdm(self.list_of_models): 47 | # switch to channels first 48 | tf.keras.backend.set_image_data_format('channels_first') 49 | model_NCHW = model(**self.model_kwargs).build() 50 | model_NCHW_params = model_NCHW.count_params() 51 | del model_NCHW 52 | tf.keras.backend.clear_session() 53 | # switch back to channels last 54 | tf.keras.backend.set_image_data_format('channels_last') 55 | model_NHWC = model(**self.model_kwargs).build() 56 | model_NHWC_params = model_NHWC.count_params() 57 | del model_NHWC 58 | tf.keras.backend.clear_session() 59 | # compare 60 | # print(f"Model Name : {model.__name__} TensorFlow") 61 | # print(f"Channels_last : {model_NHWC_params:,}") 62 | # print(f"Channels_first: {model_NCHW_params:,}") 63 | self.assertEqual(model_NHWC_params, model_NCHW_params) 64 | 65 | def test_compare_model_params_upstride(self): 66 | # try to import upstride module 67 | try: 68 | import upstride 69 | for up_type in [1, 2]: 70 | self.model_kwargs.update({"upstride_type": up_type, "factor": 2**up_type}) 71 | print(f"Building models for Channels_first and Channels_last for Upstride type{up_type} and Compare") 72 | for model in tqdm(self.list_models_upstride): 73 | # switch to channels first 74 | tf.keras.backend.set_image_data_format('channels_first') 75 | model_NCHW = model(**self.model_kwargs).build() 76 | model_NCHW_params = model_NCHW.count_params() 77 | del model_NCHW 78 | tf.keras.backend.clear_session() 79 | # switch back to channels last 80 | tf.keras.backend.set_image_data_format('channels_last') 81 | model_NHWC = model(**self.model_kwargs).build() 82 | model_NHWC_params = model_NHWC.count_params() 83 | del model_NHWC 84 | tf.keras.backend.clear_session() 85 | # compare 86 | # print(f"Model Name : {model.__name__} UpStride type{up_type}") 87 | # print(f"Channels_last : {model_NHWC_params:,}") 88 | # print(f"Channels_first: {model_NCHW_params:,}") 89 | self.assertEqual(model_NHWC_params, model_NCHW_params) 90 | except ModuleNotFoundError: 91 | print("Unit test test_compare_model_params_upstride skipped as upstride is required for this test") 92 | -------------------------------------------------------------------------------- /inference_server.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zmq 3 | import numpy as np 4 | import tensorflow as tf 5 | import upstride_argparse as argparse 6 | from src.data import dataloader, augmentations 7 | from submodules.global_dl import global_conf 8 | 9 | args_spec = [ 10 | # framework specification 11 | [str, 'model_dir', None, 'Path to a folder containing saved model', lambda x: os.path.exists(x)], 12 | 13 | # dataloader specification to run inference on a dataset 14 | [int, "num_classes", 0, 'Number of classes'], 15 | ['namespace', 'dataloader', [ 16 | ['list[str]', 'list', ['Resize', 'CentralCrop', 'Normalize'], 'Comma-separated list of data augmentation operations'], 17 | [str, "data_dir", '', "directory to read/write data. Defaults to \"~/tensorflow_datasets\""], 18 | [str, 'name', None, 'Choose the dataset to be used'], 19 | [str, 'split_id', 'validation', 'Split id in the dataset to use'], 20 | [int, 'batch_size', 1, 'The size of batch per gpu', lambda x: x > 0], 21 | ] + augmentations.arguments], 22 | 23 | # networking parameters 24 | [int, 'zmq_port', 5555, 'Specify the port to connect the ZMQ socket', lambda x: x > 0], 25 | ] + global_conf.arguments 26 | 27 | 28 | 29 | def load_model(args): 30 | from train import get_experiment_name 31 | # import upstride to enable model deserialization 32 | import upstride.type0.tf.keras.layers 33 | import upstride.type2.tf.keras.layers 34 | print("Loading model from", args['model_dir']) 35 | model = tf.keras.models.load_model(args['model_dir'], compile=False) # compile=True fails on nano, maybe due to TF 2.2/2.3 difference 36 | model.compile(loss='categorical_crossentropy') 37 | return model 38 | 39 | 40 | def evaluate_dataset(args, model): 41 | print(f"Evaluating on {args['dataloader']['name']}") 42 | args['dataloader']['train_split_id'] = None 43 | dataset = dataloader.get_dataset(args['dataloader'], transformation_list=args['dataloader']['val_list'], 44 | num_classes=args["num_classes"], split=args['dataloader']['val_split_id']) 45 | model.evaluate(dataset) 46 | 47 | 48 | def create_zmq_socket(zmq_port): 49 | context = zmq.Context() 50 | socket = context.socket(zmq.REP) 51 | socket.bind("tcp://*:" + str(zmq_port)) 52 | return socket 53 | 54 | 55 | def process_incoming_image_batches(model, shape, socket): 56 | received_messages_count = 0 57 | logging_frequency = 1000 58 | 59 | # set batch dimension to -1 for reshaping 60 | if shape[0] is None: 61 | shape[0] = -1 62 | 63 | # loop forever processing incoming messages 64 | print(f"Listening to network...") 65 | while True: 66 | # wait for a message 67 | message = socket.recv() 68 | if received_messages_count % logging_frequency == 0: 69 | print(f"Processed {received_messages_count} messages") 70 | # convert the message to image 71 | img = np.frombuffer(message, dtype='float16').reshape(shape) 72 | res = model.predict(img) 73 | socket.send(res) 74 | received_messages_count += 1 75 | 76 | 77 | def main(): 78 | """ CLI entry point 79 | """ 80 | # parse arguments 81 | args = argparse.parse_cmd(args_spec) 82 | 83 | # perform global configuration (XLA and memory growth) 84 | global_conf.config_tf2(args) 85 | 86 | # load model 87 | model = load_model(args) 88 | model.summary() 89 | 90 | # if dataloader.name is set, evaluating on a specific dataset 91 | if args['dataloader']['name'] is not None: 92 | evaluate_dataset(args, model) 93 | 94 | # otherwise for images listen to a zmq socket 95 | else: 96 | socket = create_zmq_socket(args['zmq_port']) 97 | assert len(model.inputs) == 1, "Cannot find model input to send images on" 98 | process_incoming_image_batches(model, list(model.inputs[0].shape), socket) 99 | 100 | 101 | if __name__ == '__main__': 102 | main() 103 | -------------------------------------------------------------------------------- /src/test_argument_parser.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import os 3 | import tempfile 4 | import json 5 | import yaml 6 | import unittest 7 | import argparse 8 | from unittest import mock 9 | from .argument_parser import read_yaml_config, parse_cmd 10 | 11 | 12 | class TestArgumentParser(unittest.TestCase): 13 | def test_read_yaml_config(self): 14 | config_dir = create_yaml_file() 15 | parameters = init_parameters() 16 | read_yaml_config(os.path.join(config_dir, "config.yml"), parameters) 17 | self.assertEqual(parameters['parameter_int'], 2) 18 | self.assertEqual(parameters['parameter_str'], "plip") 19 | self.assertEqual(parameters['parameter_list'], [1, 2, 3]) 20 | self.assertEqual(parameters['parameter_bool'], True) 21 | self.assertEqual(parameters['parameter_dict']['parameter_int'], 3) 22 | shutil.rmtree(config_dir) 23 | 24 | @mock.patch('argparse.ArgumentParser.parse_args', 25 | return_value=argparse.Namespace()) 26 | def test_parse_empty_config(self, mock_args): 27 | arguments = get_arguments() 28 | parameters = parse_cmd(arguments) 29 | out_parameters = { 30 | "parameter_int": 0, 31 | "parameter_str": '', 32 | "parameter_list": [1, 5, 6], 33 | "parameter_bool": False, 34 | "parameter_dict": { 35 | "parameter_int": 5 36 | } 37 | } 38 | self.assertEqual(parameters, out_parameters) 39 | 40 | @mock.patch('argparse.ArgumentParser.parse_args', 41 | return_value=argparse.Namespace(**{"yaml_config": ['ressources/testing/config.yml']})) 42 | def test_parse_json_config(self, mock_args): 43 | arguments = get_arguments() 44 | arguments.append([str, "json_config", "", "config file overriden by these argparser parameters"]) 45 | parameters = parse_cmd(arguments) 46 | self.assertEqual(parameters['parameter_int'], 1) 47 | self.assertEqual(parameters['parameter_str'], "plop") 48 | self.assertEqual(parameters['parameter_list'], [1, 2, 3]) 49 | self.assertEqual(parameters['parameter_bool'], True) 50 | self.assertEqual(parameters['parameter_dict']['parameter_int'], 3) 51 | 52 | @mock.patch('argparse.ArgumentParser.parse_args', 53 | return_value=argparse.Namespace(**{"yaml_config": ['ressources/testing/config.yml'], "parameter_int": -1})) 54 | def test_parse_json_mix_config(self, mock_args): 55 | arguments = get_arguments() 56 | arguments.append([str, "json_config", "", "config file overriden by these argparser parameters"]) 57 | arguments.append([str, "other_param", "test", "plop"]) 58 | parameters = parse_cmd(arguments) 59 | self.assertEqual(parameters['parameter_int'], -1) 60 | self.assertEqual(parameters['parameter_str'], "plop") 61 | self.assertEqual(parameters['parameter_list'], [1, 2, 3]) 62 | self.assertEqual(parameters['parameter_bool'], True) 63 | self.assertEqual(parameters['other_param'], "test") 64 | 65 | 66 | def get_arguments(): 67 | return [ 68 | [int, "parameter_int", 0, "", lambda x: x < 2], 69 | [str, "parameter_str", "", ""], 70 | ['list[int]', "parameter_list", [1, 5, 6], ""], 71 | [bool, "parameter_bool", False, ""], 72 | ['namespace', 'parameter_dict', [ 73 | [int, 'parameter_int', 5, ''] 74 | ]] 75 | ] 76 | 77 | 78 | def create_yaml_file(): 79 | config_dir = tempfile.mkdtemp() 80 | yaml_content = { 81 | "parameter_int": 2, 82 | "parameter_str": "plip", 83 | "parameter_list": [1, 2, 3], 84 | "parameter_bool": True, 85 | "parameter_dict": { 86 | "parameter_int": 3 87 | } 88 | } 89 | with open(os.path.join(config_dir, 'config.yml'), 'w') as outfile: 90 | yaml.dump(yaml_content, outfile) 91 | return config_dir 92 | 93 | 94 | def init_parameters(): 95 | parameters = { 96 | "parameter_int": None, 97 | "parameter_str": None, 98 | "parameter_list": None, 99 | "parameter_bool": None, 100 | "parameter_dict": { 101 | "parameter_int": None 102 | } 103 | } 104 | return parameters 105 | -------------------------------------------------------------------------------- /src/test_tfrecord_extractor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import shutil 4 | import tempfile 5 | import unittest 6 | import cv2 7 | import numpy as np 8 | from src.data.dataloader import TFRecordExtractor 9 | 10 | sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 'scripts')) 11 | from tfrecord_by_separate_dir_or_annotation_file import build_tfrecord_dataset 12 | 13 | 14 | class TestTfrecordExtractor(unittest.TestCase): 15 | def test_process(self): 16 | tfrecord_dir_path, dataset_name = create_dataset() 17 | 18 | # Retrieve the train tf records 19 | train_dataset_extractor = TFRecordExtractor(dataset_name, tfrecord_dir_path, "train") 20 | for image, label in train_dataset_extractor.get_tf_dataset().take(1): 21 | # Check train image shape 22 | self.assertEqual(image.shape, (640, 480, 3)) 23 | 24 | val_dataset_extractor = TFRecordExtractor(dataset_name, tfrecord_dir_path, "validation") 25 | for image, label in val_dataset_extractor.get_tf_dataset().take(1): 26 | # Check validation image shape 27 | self.assertEqual(image.shape, (520, 380, 3)) 28 | 29 | test_dataset_extractor = TFRecordExtractor(dataset_name, tfrecord_dir_path, "test") 30 | for image, label in test_dataset_extractor.get_tf_dataset().take(1): 31 | # Check test image shape 32 | self.assertEqual(image.shape, (520, 380, 3)) 33 | 34 | shutil.rmtree(tfrecord_dir_path) 35 | 36 | 37 | def create_dataset(): 38 | TRAIN_EXAMPLE_PER_CLASS = 10 39 | VAL_EXAMPLE_PER_CLASS = 5 40 | TEST_EXAMPLE_PER_CLASS = 4 41 | train_dir = create_fake_dataset_from_directory(TRAIN_EXAMPLE_PER_CLASS) 42 | val_dir, val_annotation_file = create_fake_dataset_with_annotation_file(VAL_EXAMPLE_PER_CLASS) 43 | test_dir, test_annotation_file = create_fake_dataset_with_annotation_file(TEST_EXAMPLE_PER_CLASS) 44 | name = 'Test-dataset' 45 | description = 'A small test datset' 46 | tfrecord_dir_path = tempfile.mkdtemp() 47 | 48 | args = {'name': name, 'description': description, 'tfrecord_dir_path': tfrecord_dir_path, 49 | 'tfrecord_size': 2, 'preprocessing': 'NO', 'image_size': (224, 224), 50 | 'train': {'images_dir_path': train_dir, 51 | 'annotation_file_path': None, 52 | 'delimiter': ',', 53 | 'header_exists': False, 54 | }, 55 | 'validation': {'images_dir_path': val_dir, 56 | 'annotation_file_path': val_annotation_file, 57 | 'delimiter': ',', 58 | 'header_exists': False, 59 | }, 60 | 'test': {'images_dir_path': test_dir, 61 | 'annotation_file_path': test_annotation_file, 62 | 'delimiter': ',', 63 | 'header_exists': False, 64 | } 65 | } 66 | build_tfrecord_dataset(args) 67 | 68 | shutil.rmtree(train_dir) 69 | shutil.rmtree(val_dir) 70 | shutil.rmtree(test_dir) 71 | 72 | return tfrecord_dir_path, name 73 | 74 | 75 | def create_fake_dataset_from_directory(n_images_per_class=2): 76 | dataset_dir = tempfile.mkdtemp() 77 | os.makedirs(os.path.join(dataset_dir, 'cat'), exist_ok=True) 78 | os.makedirs(os.path.join(dataset_dir, 'dog'), exist_ok=True) 79 | for i in range(n_images_per_class): 80 | cv2.imwrite(os.path.join(dataset_dir, 'dog', '{}.jpg'.format(i)), np.ones((640, 480, 3), dtype=np.uint8) * 255) 81 | cv2.imwrite(os.path.join(dataset_dir, 'cat', '{}.jpg'.format(i)), np.ones((640, 480, 3), dtype=np.uint8) * 255) 82 | return dataset_dir 83 | 84 | 85 | def create_fake_dataset_with_annotation_file(n_images_per_class=2): 86 | dataset_dir = tempfile.mkdtemp() 87 | os.makedirs(dataset_dir, exist_ok=True) 88 | 89 | annotation_file = os.path.join(dataset_dir, 'annotations.txt') 90 | 91 | labels = ['cat', 'dog'] 92 | 93 | with open(annotation_file, 'w', encoding='utf-8') as f: 94 | for i in range(n_images_per_class*2): 95 | cv2.imwrite(os.path.join(dataset_dir, '{}.jpg'.format(i)), np.ones((520, 380, 3), dtype=np.uint8) * 255) 96 | line = '{}.jpg'.format(i) + "," + labels[i % 2] + "\n" 97 | f.write(line) 98 | 99 | return dataset_dir, annotation_file 100 | -------------------------------------------------------------------------------- /train_keras_tuner.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | import upstride_argparse as argparse 4 | from kerastuner.tuners import Hyperband, BayesianOptimization 5 | from src.data import dataloader 6 | from src.models import model_name_to_class 7 | from src.models.generic_model import framework_list 8 | from src.utils import check_folder, get_imagenet_data, model_dir 9 | from submodules.global_dl import global_conf 10 | from submodules.global_dl.training.training import create_env_directories, setup_mp, define_model_in_strategy, get_callbacks, init_custom_checkpoint_callbacks 11 | from submodules.global_dl.training import training 12 | from submodules.global_dl.training import alchemy_api 13 | from submodules.global_dl.training import export 14 | from submodules.global_dl.training.optimizers import get_lr_scheduler, get_optimizer, arguments 15 | from submodules.global_dl.training import optimizers 16 | 17 | 18 | arguments = [ 19 | ['namespace', 'dataloader', dataloader.arguments], 20 | ['namespace', 'server', alchemy_api.arguments], 21 | ['namespace', 'optimizer', optimizers.arguments], 22 | ['namespace', 'export', export.arguments], 23 | ['list[str]', 'frameworks', ['tensorflow'], 'List of framework to use to define the model', lambda x: not any(y not in framework_list for y in x)], 24 | ['namespace', 'factor', [[str, 'scale', 'log', 'linear or log'], [float, 'min', 1, ''], [float, 'max', 1, ''], [float, 'step', 0, ''], ]], 25 | [str, "model_name", '', 'Specify the name of the model', lambda x: x in model_name_to_class], 26 | 27 | ] + global_conf.arguments + training.arguments 28 | 29 | 30 | def main(): 31 | """ function called when starting the code via command-line 32 | """ 33 | args = argparse.parse_cmd(arguments) 34 | args['server'] = alchemy_api.start_training(args['server']) 35 | train(args) 36 | 37 | 38 | def get_values_from_args(args): 39 | if args['scale'] == 'linear': 40 | values = list(range(args['min'], args['max'], args['step'])) 41 | elif args['scale'] == 'log': 42 | values = [] 43 | previous = args['min'] 44 | while previous <= args['max']: 45 | values.append(previous) 46 | previous *= args['step'] 47 | else: 48 | raise ValueError(f"unknown scale '{args['scale']}'") 49 | return values 50 | 51 | 52 | def get_model(args): 53 | def build_model(hp): 54 | factor = hp.Choice('factor', get_values_from_args(args['factor']), ordered=True) 55 | framework = hp.Choice('framework', args['frameworks']) 56 | model = model_name_to_class[args['model_name']](framework, 57 | factor, 58 | args['input_size'], 59 | args['num_classes'], 60 | hp=hp).model 61 | model.compile( 62 | optimizer=get_optimizer(args['optimizer']), 63 | loss='categorical_crossentropy', 64 | metrics=['accuracy']) 65 | return model 66 | return build_model 67 | 68 | 69 | def get_experiment_name(args): 70 | experiment_dir = f"keras_tuner_{args['model_name']}" 71 | if args['configuration']['with_mixed_precision']: 72 | experiment_dir += "_mp" 73 | return experiment_dir 74 | 75 | 76 | def train(args): 77 | print(args) 78 | global_conf.config_tf2(args) 79 | checkpoint_dir, log_dir, export_dir = create_env_directories(args, get_experiment_name(args)) 80 | 81 | train_dataset = dataloader.get_dataset(args['dataloader'], transformation_list=args['dataloader']['train_list'], 82 | num_classes=args["num_classes"], split=args['dataloader']['train_split_id']) 83 | val_dataset = dataloader.get_dataset(args['dataloader'], transformation_list=args['dataloader']['val_list'], 84 | num_classes=args["num_classes"], split=args['dataloader']['val_split_id']) 85 | 86 | setup_mp(args) 87 | build_model_fn = get_model(args) 88 | callbacks = get_callbacks(args, log_dir) 89 | 90 | # tuner = Hyperband(build_model_fn, 91 | # objective='val_accuracy', 92 | # max_epochs=args['num_epochs'], 93 | # hyperband_iterations=10e100, 94 | # directory=checkpoint_dir) 95 | 96 | tuner = BayesianOptimization(build_model_fn, 97 | objective='val_accuracy', 98 | max_trials=100000, 99 | num_initial_points=10, 100 | directory=checkpoint_dir) 101 | 102 | tuner.search_space_summary() 103 | tuner.search(x=train_dataset, 104 | validation_data=val_dataset, 105 | callbacks=callbacks, 106 | epochs=args['num_epochs']) 107 | tuner.results_summary() 108 | 109 | 110 | if __name__ == '__main__': 111 | main() 112 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import glob 3 | import os 4 | import random 5 | import cv2 6 | import numpy as np 7 | import tensorflow as tf 8 | from shutil import copyfile 9 | 10 | 11 | def copy_and_resize(source, dest, img_size): 12 | os.makedirs(dest, exist_ok=True) 13 | 14 | images_extensions = [".jpg", ".png", ".JPEG"] 15 | 16 | if source[-1] == "/": 17 | source = source[:-1] 18 | sources_len = len(source) 19 | for root, dirs, files in os.walk(source): 20 | for d in dirs: 21 | os.makedirs(os.path.join(dest, root[sources_len+1:], d), exist_ok=True) 22 | for f in files: 23 | if os.path.splitext(f)[1] in images_extensions: 24 | # then load, resize and save 25 | image = cv2.imread(os.path.join(root, f)) 26 | image = cv2.resize(image, (img_size, img_size)) 27 | r = cv2.imwrite(os.path.join(dest, root[sources_len+1:], f), image) 28 | if r == False: 29 | raise Exception("issue writing image {}".format(os.path.join(dest, root[sources_len+1:], f))) 30 | else: 31 | # simple copy 32 | copyfile(os.path.join(root, f), os.path.join(dest, root[sources_len+1:], f)) 33 | 34 | 35 | def model_dir(args): 36 | if args.model_name == 'resnet': 37 | return "{}{}".format(args.model_name, args.res_n) 38 | else: 39 | return "{}".format(args.model_name) 40 | 41 | 42 | def get_synset(path: str): 43 | """Parse the LOC_synset_mapping.txt file given in imagenet dataset 44 | 45 | Args: 46 | path (str): path of the LOC_synset_mapping.txt file 47 | 48 | Returns: 49 | dict: dictionary mapping the label to the class id 50 | """ 51 | with open(path) as csv_file: 52 | csv_reader = csv.reader(csv_file, delimiter=' ') 53 | synset_dict = {} 54 | for i, row in enumerate(csv_reader): 55 | synset_dict[row[0]] = i 56 | return synset_dict 57 | 58 | 59 | def get_paths(dir: str): 60 | pattern = os.path.join(dir, '**', '*.JPEG') 61 | return glob.glob(pattern, recursive=True) 62 | 63 | 64 | def get_partial_paths(dir, percentage): 65 | """ 66 | 67 | :param dir: train data directory 68 | :param percentage: based on the percentage select the partial images for each class 69 | :return: 70 | """ 71 | random.seed(1) 72 | percentage /= 100 73 | class_dirs = os.listdir(dir) 74 | paths = [] 75 | for d in class_dirs: 76 | class_paths = glob.glob(os.path.join(dir, d, '*.JPEG')) 77 | random.shuffle(class_paths) 78 | end = int(len(class_paths) * percentage) 79 | paths += class_paths[0:end] 80 | return paths 81 | 82 | 83 | def get_val_label_dict(val_gt_path: str): 84 | with open(val_gt_path) as csv_file: 85 | csv_reader = csv.reader(csv_file, delimiter=',') 86 | next(csv_reader, None) # skip the header 87 | val_dict = {} 88 | for row in csv_reader: 89 | val_dict[row[0]] = row[1].split(" ")[0] 90 | return val_dict 91 | 92 | 93 | def get_imagenet_data(imagenet_data_args): 94 | """parse a imagenet dataset files and return usefull data for training and validation 95 | 96 | Args: 97 | synset_path (str): for instance "/home/user/upstride-tests/ILSVRC/LOC_synset_mapping.txt" 98 | train_dir (str): for instance "/home/user/upstride-tests/ILSVRC/Data/CLS-LOC/train/" 99 | training_percentage (int): 100 for training on the whole dataset 100 | val_dir (str): for instance "/home/user/upstride-tests/ILSVRC/Data/CLS-LOC/val/" 101 | val_gt_path (str): for instance "/home/user/upstride-tests/ILSVRC/LOC_val_solution.csv" 102 | 103 | Returns: 104 | tuple of 4 elements : train_paths, train_labels, val_paths, val_labels 105 | paths are lists of strings, labels are lists of integers 106 | """ 107 | synset_path = imagenet_data_args['synset_path'] 108 | train_dir = imagenet_data_args['train_dir'] 109 | training_percentage = imagenet_data_args['train_data_percentage'] 110 | val_dir = imagenet_data_args['val_dir'] 111 | val_gt_path = imagenet_data_args['val_gt_path'] 112 | 113 | synset = get_synset(synset_path) 114 | train_paths = get_paths(train_dir) if training_percentage == 100 else get_partial_paths(train_dir, training_percentage) 115 | train_labels = [synset[path.split("/")[-2]] for path in train_paths] 116 | 117 | # train data are shuffled 118 | random.seed(0) 119 | combined = list(zip(train_paths, train_labels)) 120 | random.shuffle(combined) 121 | train_paths, train_labels = zip(*combined) 122 | 123 | val_label_dict = get_val_label_dict(val_gt_path) 124 | val_paths = get_paths(val_dir) 125 | val_labels = [synset[val_label_dict[path.split("/")[-1].split(".")[0]]] for path in val_paths] 126 | 127 | return train_paths, train_labels, val_paths, val_labels 128 | 129 | 130 | def check_folder(log_dir): 131 | # TODO os.makedirs(..., exists_ok=True) does the job 132 | if not os.path.exists(log_dir): 133 | os.makedirs(log_dir) 134 | return log_dir 135 | -------------------------------------------------------------------------------- /src/models/wide_resnet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from .generic_model import GenericModelBuilder 4 | 5 | 6 | weight_init = tf.keras.initializers.VarianceScaling() 7 | 8 | 9 | class WideResNet(GenericModelBuilder): 10 | def __init__(self, *args, **kwargs): 11 | super(WideResNet, self).__init__(*args, **kwargs) 12 | 13 | def model(self, x): 14 | layers = self.layers 15 | weight_regularizer = self.weight_regularizer 16 | num_blocks_per_resnet = self.blocks_per_group 17 | filters = [int(16/self.factor), 18 | int(16*self.channel_multiplier/self.factor), 19 | int(32*self.channel_multiplier/self.factor), 20 | int(64*self.channel_multiplier/self.factor)] 21 | strides = [1, 2, 2] # stride for each resblock 22 | final_stride_val = np.prod(strides) 23 | 24 | ch = filters[0] 25 | x = layers.Conv2D(ch, 3, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, padding="same", name='conv')(x) 26 | 27 | first_x = x # Res from the beginning 28 | 29 | for block_num in range(1, 4): 30 | orig_x = x # Res from previous block 31 | activate_before_residual = True if block_num == 1 else False 32 | block_name = f'resblock_{block_num}' 33 | x = self.resblock(x, filters[block_num - 1], filters[block_num], stride=strides[block_num-1], 34 | activate_before_residual=activate_before_residual, block_name=block_name+'_0') 35 | for i in range(1, num_blocks_per_resnet): 36 | x = self.resblock(x, filters[block_num], filters[block_num], stride=1, 37 | activate_before_residual=False, block_name=block_name+f'_{i}') 38 | orig_x = self._conform_size(filters[block_num - 1], filters[block_num], 39 | strides[block_num - 1], orig_x, block_name=block_name+f'_{i}') 40 | x = layers.Add()([x, orig_x]) 41 | 42 | orig_x = self._conform_size(filters[0], filters[-1],final_stride_val, first_x, 'last_block') 43 | x = layers.Add()([x, orig_x]) 44 | 45 | x = layers.BatchNormalization(axis=self.channel_axis, name='batch_norm_last')(x) 46 | x = layers.Activation('relu', name='relu_last')(x) 47 | x = layers.GlobalAveragePooling2D()(x) 48 | return x 49 | 50 | 51 | def resblock(self, x, in_filter, out_filter, stride=1, use_bias=False, activate_before_residual=False, block_name='resblock'): 52 | layers = self.layers 53 | weight_regularizer = self.weight_regularizer 54 | if activate_before_residual: 55 | x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_0')(x) 56 | x = layers.Activation('relu', name=block_name + '/relu_0')(x) 57 | x_init = x 58 | else: 59 | x_init = x 60 | x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_0')(x) 61 | x = layers.Activation('relu', name=block_name + '/relu_0')(x) 62 | 63 | x = layers.Conv2D(out_filter, 3, stride, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, 64 | use_bias=use_bias, padding='same', name=block_name + '/conv_0')(x) 65 | x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_1')(x) 66 | x = layers.Activation('relu', name=block_name + '/relu_1')(x) 67 | x = layers.Conv2D(out_filter, 3, 1, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, 68 | use_bias=use_bias, padding='same', name=block_name + '/conv_1')(x) 69 | 70 | x_init = self._conform_size(in_filter, out_filter, stride, x_init, block_name) 71 | x = layers.Add()([x, x_init]) 72 | return x 73 | 74 | def _conform_size(self, in_filter, out_filter, stride, x_init, block_name): 75 | layers = self.layers 76 | if in_filter != out_filter: 77 | x_init = layers.AveragePooling2D(pool_size=(stride, stride), name=block_name + '/avg_pool_0')(x_init) 78 | # hack to pad the channels 79 | if self.is_channels_first: 80 | x_init = tf.transpose(x_init, [0, 2, 3, 1]) # put the channels at index 3 81 | x_init = layers.ZeroPadding2D(padding=(0,(out_filter-in_filter)//2), name=block_name + '/zero_pad_0')(x_init) 82 | x_init = tf.transpose(x_init, [0, 3, 1, 2]) # put the channels back at index 1 83 | else: 84 | x_init = tf.transpose(x_init, [0, 3, 1, 2]) # put the channels at index 1 85 | x_init = layers.ZeroPadding2D(padding=((out_filter-in_filter)//2,0), name=block_name + '/zero_pad_0')(x_init) 86 | x_init = tf.transpose(x_init, [0, 2, 3, 1]) # put the channels back at index 3 87 | return x_init 88 | 89 | class WideResNet28_10(WideResNet): 90 | def __init__(self, *args, **kwargs): 91 | self.channel_multiplier = 10 92 | self.blocks_per_group = 4 93 | super().__init__(*args, **kwargs) 94 | 95 | class WideResNet40_2(WideResNet): 96 | def __init__(self, *args, **kwargs): 97 | self.channel_multiplier = 2 98 | self.blocks_per_group = 6 99 | super().__init__(*args, **kwargs) 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /src/models/complexnet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .generic_model import GenericModelBuilder 3 | 4 | 5 | class ComplexNet(GenericModelBuilder): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | self.bn_args = { 9 | "axis": self.channel_axis, 10 | "momentum": 0.9, 11 | "epsilon": 1e-04 12 | } 13 | self.conv_args = { 14 | "padding": "same", 15 | "use_bias": False, 16 | "kernel_regularizer": self.weight_regularizer, 17 | "kernel_initializer": "he_ind" 18 | } 19 | 20 | def residual_block(self, x, channels: int, downsample=False): 21 | layers = self.layers 22 | x_init = x 23 | strides = (2, 2) if downsample else (1, 1) 24 | x = layers.BatchNormalizationC(**self.bn_args)(x) 25 | x = layers.Activation('relu')(x) 26 | x = layers.Conv2D(channels, 3, strides, **self.conv_args)(x) 27 | x = layers.BatchNormalizationC(**self.bn_args)(x) 28 | x = layers.Activation('relu')(x) 29 | x = layers.Conv2D(channels, 3, **self.conv_args)(x) 30 | if not downsample: 31 | x = layers.Add()([x, x_init]) 32 | else: 33 | x_init = layers.Conv2D(channels, 1, 2, **self.conv_args)(x_init) 34 | x = layers.Concatenate(axis=self.channel_axis)([x_init, x]) 35 | return x 36 | 37 | def learnVectorBlock(self, x): 38 | x = tf.keras.layers.BatchNormalization(**self.bn_args)(x) 39 | x = tf.keras.layers.Activation('relu')(x) 40 | x = tf.keras.layers.Convolution2D(3, self.lvb_kernel_size, kernel_initializer='he_normal', **self.conv_args)(x) 41 | x = tf.keras.layers.BatchNormalization(**self.bn_args)(x) 42 | x = tf.keras.layers.Activation('relu')(x) 43 | x = tf.keras.layers.Convolution2D(3, self.lvb_kernel_size, kernel_initializer='he_normal', **self.conv_args)(x) 44 | return x 45 | 46 | def model(self, x): 47 | n_channels = self.n_channels_type_0 // self.factor 48 | layers = self.layers 49 | if layers == tf.keras.layers: 50 | print("real definition") 51 | r = x 52 | x = self.learnVectorBlock(x) 53 | x = tf.keras.layers.Concatenate(axis=self.channel_axis)([r, x]) 54 | self.conv_args['kernel_initializer'] = 'he_normal' 55 | 56 | x = self.layers.Conv2D(n_channels, 3, **self.conv_args)(x) 57 | x = self.layers.BatchNormalizationC(**self.bn_args)(x) 58 | x = self.layers.Activation('relu')(x) 59 | 60 | # First stage 61 | for i in range(self.n_blocks): # -1 because the last one is a downsample 62 | x = self.residual_block(x, n_channels) 63 | x = self.residual_block(x, n_channels, True) 64 | 65 | # stage 2 66 | for i in range(self.n_blocks - 1): # -1 because the last one is a downsample and one is removed (see paper) 67 | x = self.residual_block(x, n_channels * 2) 68 | x = self.residual_block(x, n_channels * 2, True) 69 | 70 | # stage 3 71 | for i in range(self.n_blocks - 1): # -1 because the last one is a downsample and one is removed (see paper) 72 | x = self.residual_block(x, n_channels * 4) 73 | 74 | x = self.layers.GlobalAveragePooling2D()(x) 75 | 76 | return x 77 | 78 | 79 | # Definition from the Quaternion Paper 80 | class ShallowComplexNet(ComplexNet): 81 | def __init__(self, *args, **kwargs): 82 | self.conv_init = None 83 | self.n_blocks = 2 84 | self.n_channels_type_0 = 32 85 | self.lvb_kernel_size = 3 86 | super().__init__(*args, **kwargs) 87 | 88 | 89 | class DeepComplexNet(ComplexNet): 90 | def __init__(self, *args, **kwargs): 91 | self.n_blocks = 11 92 | self.n_channels_type_0 = 32 93 | self.lvb_kernel_size = 3 94 | super().__init__(*args, **kwargs) 95 | 96 | 97 | # definition from complex paper 98 | 99 | # Wide and shallow definition 100 | class WSComplexNetTF(ComplexNet): 101 | def __init__(self, *args, **kwargs): 102 | self.n_blocks = 14 103 | self.n_channels_type_0 = 18 104 | self.lvb_kernel_size = 1 105 | super().__init__(*args, **kwargs) 106 | 107 | 108 | class WSComplexNetUpStride(ComplexNet): 109 | def __init__(self, *args, **kwargs): 110 | self.n_blocks = 16 111 | self.n_channels_type_0 = 12 * 2 # because 12 is the number of complex filter and we use factor 2 112 | self.lvb_kernel_size = 1 113 | super().__init__(*args, **kwargs) 114 | 115 | # Deep and Narrow 116 | 117 | 118 | class DNComplexNetTF(ComplexNet): 119 | def __init__(self, *args, **kwargs): 120 | self.n_blocks = 23 121 | self.n_channels_type_0 = 14 122 | self.lvb_kernel_size = 1 123 | super().__init__(*args, **kwargs) 124 | 125 | 126 | class DNComplexNetUpStride(ComplexNet): 127 | def __init__(self, *args, **kwargs): 128 | self.n_blocks = 23 129 | self.n_channels_type_0 = 10 * 2 # because 12 is the number of complex filter and we use factor 2 130 | self.lvb_kernel_size = 1 131 | super().__init__(*args, **kwargs) 132 | 133 | 134 | # In Between 135 | class IBComplexNetTF(ComplexNet): 136 | def __init__(self, *args, **kwargs): 137 | self.n_blocks = 18 138 | self.n_channels_type_0 = 16 139 | self.lvb_kernel_size = 1 140 | super().__init__(*args, **kwargs) 141 | 142 | 143 | class IBComplexNetUpStride(ComplexNet): 144 | def __init__(self, *args, **kwargs): 145 | self.n_blocks = 19 146 | self.n_channels_type_0 = 11 * 2 # because 12 is the number of complex filter and we use factor 2 147 | self.lvb_kernel_size = 1 148 | super().__init__(*args, **kwargs) 149 | -------------------------------------------------------------------------------- /src/models/generic_model.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import tensorflow as tf 3 | 4 | """ 5 | Question: 6 | - Weight decay ? 7 | - kwargs for specific stuff ? 8 | """ 9 | 10 | 11 | def load_upstride(upstride_type: int): 12 | """This function load one of upstride types 13 | """ 14 | if upstride_type == -1: 15 | return None 16 | if upstride_type == 0: 17 | import upstride.type0.tf.keras.layers as up_layers 18 | return up_layers 19 | if upstride_type == 1: 20 | import upstride.type1.tf.keras.layers as up_layers 21 | return up_layers 22 | if upstride_type == 2: 23 | import upstride.type2.tf.keras.layers as up_layers 24 | return up_layers 25 | if upstride_type == 3: 26 | import upstride.type3.tf.keras.layers as up_layers 27 | return up_layers 28 | 29 | 30 | class GenericModelBuilder: 31 | def __init__(self, input_size, changing_ids: List[str], num_classes, factor=1, upstride_type=-1, tf2upstride_strategy="", upstride2tf_strategy="", weight_decay=0, **kwargs): 32 | self.input_size = input_size 33 | self.num_classes = num_classes 34 | self.factor = factor 35 | self.upstride_type = upstride_type 36 | self.tf2upstride_strategy = tf2upstride_strategy 37 | self.upstride2tf_strategy = upstride2tf_strategy 38 | self.is_channels_first = True if tf.keras.backend.image_data_format() == 'channels_first' else False 39 | self.channel_axis = 1 if self.is_channels_first else -1 40 | 41 | # Configure list of ids to change framework 42 | if upstride_type == -1: 43 | # then no switch between tf and upstride 44 | self.changing_ids = [] 45 | elif changing_ids == []: 46 | # then set default parameters 47 | self.changing_ids = ['beginning', 'end_after_dense'] 48 | else: 49 | self.changing_ids = changing_ids 50 | 51 | # kwargs contains special parameter that can be specific for one model. For instance 52 | # - load_searched_arch for architecture search method 53 | # - drop_path_prob for fb-net 54 | # - conversion_params if tf2upstride or upstride2tf need specific parameters 55 | # - hp : the keras-tuner hyperparameters 56 | self.kwargs = kwargs 57 | 58 | # self.layers is the layers package to use when building the neural network 59 | self.layers = tf.keras.layers 60 | self.upstride_layers = load_upstride(upstride_type) 61 | self._is_using_tf_layers = True 62 | 63 | # weight_regularizer can be call in the model definition in any subclass of GenericModel 64 | self.weight_regularizer = tf.keras.regularizers.l2(l=weight_decay) 65 | 66 | # if the model use custom keras Model then overide this 67 | # This is usefull for SAM method 68 | self.model_class = tf.keras.Model 69 | 70 | # if the model use other inputs than the image then it need to add these tensors in this list 71 | # This is usefull for P-Darts, FB-NET and SAM methods 72 | self.inputs = [] 73 | 74 | 75 | def change_framework_if_necessary(self, id, inputs): 76 | """ When defining a custom model, this function should be called every time it can make sense to switch 77 | between tensorflow and upstride 78 | 79 | Args: 80 | x: can be a tensor or a list of tensors. 81 | 82 | Return: a tensor if x is a tensor, a list of tensors if x is a list of tensors 83 | """ 84 | 85 | inputs_is_single_tensor = False 86 | if type(inputs) is not list: 87 | inputs_is_single_tensor = True 88 | inputs = [inputs] 89 | 90 | if id in self.changing_ids: 91 | if self._is_using_tf_layers: 92 | # Then converting from Tensorflow to Upstride 93 | self._is_using_tf_layers = False 94 | self.layers = self.upstride_layers 95 | 96 | out_tensors = [] 97 | for x in inputs: 98 | out_tensors.append(self.upstride_layers.TF2Upstride(self.tf2upstride_strategy)(x)) 99 | else: 100 | # Then converting from Upstride to Tensorflow 101 | self._is_using_tf_layers = True 102 | self.layers = tf.keras.layers 103 | 104 | out_tensors = [] 105 | for x in inputs: 106 | out_tensors.append(self.upstride_layers.Upstride2TF(self.upstride2tf_strategy)(x)) 107 | else: 108 | # Don't change the input 109 | out_tensors = inputs 110 | 111 | if inputs_is_single_tensor: 112 | out_tensors = out_tensors[0] 113 | 114 | return out_tensors 115 | 116 | def model(self, x): 117 | raise NotImplementedError("you need to overide method model") 118 | 119 | def build(self): 120 | inputs = tf.keras.layers.Input(shape=self.input_size) 121 | self.inputs.append(inputs) 122 | if self.is_channels_first: 123 | inputs = tf.keras.layers.Lambda(lambda x: tf.transpose(x, [0, 3, 1, 2]),name='channels_first')(inputs) 124 | x = self.change_framework_if_necessary("beginning", inputs) 125 | # output_tensors is the list of the vectors to use to compute classification losses (main output + auxilary losses) 126 | output_tensors = self.model(x) 127 | if type(output_tensors) != list: 128 | output_tensors = [output_tensors] 129 | 130 | output_tensors = self.change_framework_if_necessary("end_before_dense", output_tensors) 131 | for i, x in enumerate(output_tensors): 132 | output_tensors[i] = self.layers.Dense(self.num_classes, use_bias=True, name=f'Logits_{i}', kernel_regularizer=self.weight_regularizer)(x) 133 | output_tensors = self.change_framework_if_necessary("end_after_dense", output_tensors) 134 | 135 | for i, x in enumerate(output_tensors): 136 | output_tensors[i] = tf.keras.layers.Activation("softmax", dtype=tf.float32)(x) # dtype float32 is important because of mixed precision 137 | 138 | model = self.model_class(self.inputs, output_tensors) 139 | 140 | return model 141 | -------------------------------------------------------------------------------- /src/models/hypermodels.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | # from kerastuner.applications import HyperResNet 3 | from .generic_model import GenericModelBuilder 4 | 5 | 6 | class SimpleHyper(GenericModelBuilder): 7 | def model(self, x): 8 | x = self.layers.Conv2D(self.hp.Int('conv1_filter', 9 | min_value=32//self.factor, 10 | max_value=512//self.factor, 11 | step=32//self.factor), (5, 5), 2, padding='same', 12 | use_bias=False, 13 | name='conv_1')(x) 14 | x = self.layers.BatchNormalization()(x) 15 | x = self.layers.Activation('relu')(x) 16 | x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x) 17 | for i in range(self.hp.Int('repeat_conv', 18 | min_value=1, 19 | max_value=3, 20 | step=1)): 21 | x = self.layers.Conv2D(self.hp.Int('conv_filter', 22 | min_value=32//self.factor, 23 | max_value=512//self.factor, 24 | step=32//self.factor), (3, 3), padding='same', 25 | use_bias=False)(x) 26 | x = self.layers.BatchNormalization()(x) 27 | x = self.layers.Activation('relu')(x) 28 | x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x) 29 | x = self.layers.Flatten()(x) 30 | x = self.layers.Dense(self.label_dim, 31 | use_bias=True, 32 | name='dense_1')(x) 33 | return x 34 | 35 | 36 | class ResNetV2Hyper(GenericModelBuilder): 37 | """code from https://github.com/keras-team/keras-tuner/blob/master/kerastuner/applications/resnet.py 38 | """ 39 | 40 | def model(self): 41 | conv3_depth = self.hp.Choice('conv3_depth', [4, 8]) 42 | conv4_depth = self.hp.Choice('conv4_depth', [6, 23, 36]) 43 | factor = self.hp.Int('factor', min_value=1, max_value=8, step=1) 44 | preact = True 45 | use_bias = True 46 | 47 | # Model definition. 48 | bn_axis = 3 if tf.keras.backend.image_data_format() == 'channels_last' else 1 49 | 50 | # Initial conv2d block. 51 | x = self.layers.ZeroPadding2D(padding=((3, 3), (3, 3)), name='conv1_pad')(x) 52 | x = self.layers.Conv2D(64 // factor, 7, strides=2, use_bias=use_bias, name='conv1_conv')(x) 53 | x = self.layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name='pool1_pad')(x) 54 | x = self.layers.MaxPooling2D(3, strides=2, name='pool1_pool')(x) 55 | 56 | # Middle hypertunable stack. 57 | x = stack2(self.layers, x, 64 // factor, 3, name='conv2') 58 | x = stack2(self.layers, x, 128 // factor, conv3_depth, name='conv3') 59 | x = stack2(self.layers, x, 256 // factor, conv4_depth, name='conv4') 60 | x = stack2(self.layers, x, 512 // factor, 3, stride1=1, name='conv5') 61 | 62 | # Top of the model. 63 | x = self.layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name='post_bn')(x) 64 | x = self.layers.Activation('relu', name='post_relu')(x) 65 | 66 | pooling = self.hp.Choice('pooling', ['avg', 'max'], default='avg') 67 | if pooling == 'avg': 68 | x = self.layers.GlobalAveragePooling2D(name='avg_pool')(x) 69 | elif pooling == 'max': 70 | x = self.layers.GlobalMaxPooling2D(name='max_pool')(x) 71 | 72 | x = self.layers.Dense(self.label_dim, activation='softmax', name='probs')(x) 73 | return x 74 | 75 | 76 | def block2(layers, x, filters, kernel_size=3, stride=1, conv_shortcut=False, name=None): 77 | """A residual block. 78 | # Arguments 79 | x: input tensor. 80 | filters: integer, filters of the bottleneck layer. 81 | kernel_size: default 3, kernel size of the bottleneck layer. 82 | stride: default 1, stride of the first layer. 83 | conv_shortcut: default False, use convolution shortcut if True, 84 | otherwise identity shortcut. 85 | name: string, block label. 86 | # Returns 87 | Output tensor for the residual block. 88 | """ 89 | bn_axis = 3 if tf.keras.backend.image_data_format() == 'channels_last' else 1 90 | 91 | preact = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_preact_bn')(x) 92 | preact = layers.Activation('relu', name=name + '_preact_relu')(preact) 93 | 94 | if conv_shortcut is True: 95 | shortcut = layers.Conv2D(4 * filters, 1, strides=stride, name=name + '_0_conv')(preact) 96 | else: 97 | shortcut = layers.MaxPooling2D(1, strides=stride)(x) if stride > 1 else x 98 | 99 | x = layers.Conv2D(filters, 1, strides=1, use_bias=False, name=name + '_1_conv')(preact) 100 | x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')(x) 101 | x = layers.Activation('relu', name=name + '_1_relu')(x) 102 | 103 | x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x) 104 | x = layers.Conv2D(filters, kernel_size, strides=stride, use_bias=False, name=name + '_2_conv')(x) 105 | x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_2_bn')(x) 106 | x = layers.Activation('relu', name=name + '_2_relu')(x) 107 | 108 | x = layers.Conv2D(4 * filters, 1, name=name + '_3_conv')(x) 109 | x = layers.Add(name=name + '_out')([shortcut, x]) 110 | return x 111 | 112 | 113 | def stack2(layers, x, filters, blocks, stride1=2, name=None): 114 | """A set of stacked residual blocks. 115 | # Arguments 116 | x: input tensor. 117 | filters: integer, filters of the bottleneck layer in a block. 118 | blocks: integer, blocks in the stacked blocks. 119 | stride1: default 2, stride of the first layer in the first block. 120 | name: string, stack label. 121 | # Returns 122 | Output tensor for the stacked blocks. 123 | """ 124 | x = block2(layers, x, filters, conv_shortcut=True, name=name + '_block1') 125 | for i in range(2, blocks): 126 | x = block2(layers, x, filters, name=name + '_block' + str(i)) 127 | x = block2(layers, x, filters, stride=stride1, name=name + '_block' + str(blocks)) 128 | return x 129 | -------------------------------------------------------------------------------- /inference_benchmark.py: -------------------------------------------------------------------------------- 1 | """Script to benchmark several versions of Tensorflow and Upstride Tech on different hardware and docker platforms 2 | 3 | to start a new benchmark, you can run 4 | python inference_benchmark.py --yaml_config conf1.yml conf2.yml --comments "small test" 5 | """ 6 | import os 7 | from typing import List 8 | import requests 9 | import json 10 | import yaml 11 | import upstride_argparse as argparse 12 | 13 | ENGINES = ['upstride_0', 'upstride_1', 'upstride_2', 'upstride_3', 'tensorflow'] 14 | 15 | inference_arguments = [ 16 | [int, "batch_size", 1, 'The size of batch per gpu', lambda x: x > 0], 17 | [str, "comments", "", 'some comment about this benchmark run. Will be displayed on the model zoo'], 18 | [bool, 'cpu', False, 'is True then force cpu use'], 19 | [int, 'cuda_visible_device', 0, 'the gpu to run the benchmark on'], 20 | ['list[str]', "docker_images", [], "list of docker images to test"], 21 | ['list[str]', "engines", [], "list of engines to test", lambda x: all(engine in ENGINES for engine in x)], 22 | [float, 'factor', 1, 'division factor for che number of channel per layer'], 23 | [str, "model_path", "", 'Specify the model path, to work on a real model instead of a fake one with random weights'], 24 | ['list[str]', "models", [], "list of models to test"], 25 | [str, "output", "results.md", "file with results"], 26 | [str, "profiling_dir", "/tmp", "dir where profiling files will be written"], 27 | [int, 'n_steps', 10, "number of steps to run the inference. The higher the better"], 28 | [bool, "tensorrt", False, "if true then models will be converted to tensorrt"], 29 | [str, "tensorrt_precision", 'FP32', 'Provide precision FP32 or FP16 for optimizing tensorrt'], 30 | ['list[str]', "yaml_config", [], "config files there can be as many implemented these options"], 31 | [bool, "xla", False, "if true then use xla"], 32 | ] 33 | 34 | 35 | def create_all_environment_configs(conf): 36 | """Create a list of dict with the docker, model and engine to benchmark. 37 | will create all possible triplet 38 | 39 | Returns: 40 | List of Dict with docker, model and engine 41 | """ 42 | env_configs = [] 43 | for docker in conf["docker_images"]: 44 | for model in conf["models"]: 45 | for engine in conf["engines"]: 46 | env_configs.append({"docker": docker, 47 | "model": model, 48 | "engine": engine}) 49 | return env_configs 50 | 51 | 52 | def prepare_docker(docker_images: List[str]): 53 | """download all docker images to prepare benchmark 54 | 55 | there is one exception if docker_images is "local" : in this case the benchmark with run with the host python 56 | without using docker 57 | 58 | Args: 59 | docker_images (List[str]): should be formated as ["docker_tag:docker_label", ...] 60 | """ 61 | for docker_image in docker_images: 62 | if docker_image == "local": 63 | continue 64 | print(f"Pulling {docker_image}") 65 | stream = os.popen(f"docker pull {docker_image}") 66 | out = stream.read() 67 | print(out) 68 | 69 | 70 | def docker_run_cmd(docker, engine, model, config): 71 | # dev note: all option need to have a space at the end 72 | python_cmd = f"python3 src/inference_benchmark.py "\ 73 | f"--batch_size {config['batch_size']} "\ 74 | f"--engine {engine} "\ 75 | f"--factor {config['factor']} "\ 76 | f"--model_name {model} "\ 77 | f"--n_steps {config['n_steps']} "\ 78 | f"--profiler_path {config['profiling_dir']} " 79 | if config['tensorrt']: 80 | python_cmd += f"--export_tensorrt " 81 | python_cmd += f"--tensorrt_precision {config['tensorrt_precision']} " 82 | if config['model_path']: 83 | python_cmd += f"--model_path {config['model_path']} " 84 | if config['xla']: 85 | python_cmd += f"--xla " 86 | 87 | if docker == "local": 88 | # then run without docker 89 | return python_cmd 90 | 91 | runtime = f"--gpus all -e CUDA_VISIBLE_DEVICES={config['cuda_visible_device']}" if "gpu" in docker and not config['cpu'] else "" 92 | volumes = " -v $(pwd)/src:/src -v /tmp/docker:/tmp" 93 | # Add a volume to save the profiling 94 | # docker need also to be run with the privileged parameter to access gpu information 95 | volumes += " -v $(pwd)/profiling:/profiling --privileged=true" 96 | return f"docker run -it --rm {runtime} {volumes} {docker} {python_cmd}" 97 | 98 | 99 | def format_results(env_configs, results, output_file): 100 | with open(output_file, "w") as f: 101 | f.write(f"| docker |engine |model |n_iteration|total time|time per iteration|FPS |\n") 102 | f.write(f"|:------------------------------------------------------------:|:----------:|:-------:|:---------:|:--------:|:----------------:|:-----:|\n") 103 | for i in range(len(results)): 104 | result = results[i] 105 | env_config = env_configs[i] 106 | time_per_iteration = result['total_time']/result['n_iterations'] 107 | fps = 1/time_per_iteration 108 | line = f"| {env_config['docker']} | {env_config['engine']: <10} | {env_config['model']} | {result['n_iterations']} | {result['total_time']:.2f} | {time_per_iteration:.3f} | {fps:.1f} |\n" 109 | f.write(line) 110 | 111 | 112 | def benchmark(config): 113 | print(config) 114 | # currently first gpu is being picked if there are multiple GPUs. 115 | # conf['hardware']['gpu'] = get_gpu_info().get('name')[0] 116 | prepare_docker(config["docker_images"]) 117 | # benchmark all docker images against all models against all engine 118 | env_configs = create_all_environment_configs(config) 119 | results = [] 120 | for env_config in env_configs: 121 | print(f"Benchmark {env_config['model']} using {env_config['engine']} on {env_config['docker']}") 122 | cmd = docker_run_cmd(env_config['docker'], env_config['engine'], env_config['model'], config) 123 | print(cmd) 124 | stream = os.popen(cmd) 125 | out = stream.read() 126 | print(out) 127 | # look for a correct output 128 | i = 2 129 | while out.split('\n')[-i][0] != '{': 130 | i += 1 131 | # print(i) 132 | r = json.loads(out.split('\n')[-i]) 133 | results.append(r) 134 | format_results(env_configs, results, config["output"]) 135 | 136 | 137 | if __name__ == "__main__": 138 | config = argparse.parse_cmd(inference_arguments) 139 | benchmark(config) 140 | -------------------------------------------------------------------------------- /src/models/fbnetv2.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from typing import List 4 | import yaml 5 | import tensorflow as tf 6 | import numpy as np 7 | 8 | # global variables needed for softmax gumbel computation in MaskConv 9 | temperature = 5.0 # should be multiply by 0.956 at the end of every epoch, see section 4.1 in the paper 10 | 11 | 12 | def define_temperature(new_temperature): 13 | global temperature 14 | temperature = new_temperature 15 | 16 | 17 | def create_binary_vector(channel_sizes: List[int], dtype) -> List[tf.Tensor]: 18 | """this function return a list of vector with ones at the beginning and zeros at the end 19 | it uses numpy because there is no reason for these operations to be inside the tensorflow graph. 20 | 21 | Args: 22 | channel_sizes (List[int]): number of channels in the convolution 23 | 24 | Returns: 25 | List[tf.Tensor]: list of vector like [1., 1., 1., 0., 0., 0.] 26 | """ 27 | binary_vectors = [] 28 | max_size = channel_sizes[-1] 29 | for i in range(len(channel_sizes)): 30 | ones = np.ones(channel_sizes[i]) 31 | zeros = np.zeros(max_size - channel_sizes[i]) 32 | binary_vectors.append(tf.convert_to_tensor(np.concatenate([ones, zeros], 0), dtype=dtype)) 33 | return binary_vectors 34 | 35 | 36 | def gumbel_softmax(logits, gumble_noise=False): 37 | """please have a look at https://arxiv.org/pdf/1611.01144.pdf for gumble definition 38 | """ 39 | global temperature 40 | 41 | if gumble_noise: 42 | # Gumble distribution -log(-log(u)), where u ~ (0,1) is a uniform distribution and 43 | # must be sampled from the open-interval `(0, 1)` but tf.random.uniform generates samples 44 | # where The lower bound minval is included in the range like [0, 1). To make sure the range 45 | # to be (0, 1), np.finfo(float).tiny is used as minval which gives a tiny postive floating point number 46 | u = tf.random.uniform(minval=np.finfo(float).tiny, maxval=1.0, shape=tf.shape(logits)) 47 | noise = -tf.math.log(-tf.math.log(u)) # Noise from gumbel distribution 48 | else: 49 | noise = 0.0001 50 | # During mixed precision training, Weight Variable data type is inferred from "inputs" during call method 51 | # This makes alpha to be converted to float16. 52 | # Since we are computing softmax at the end, we need to convert logits(alpha) to float32 53 | logits = tf.cast(logits, tf.float32) 54 | noisy_logits = (noise + logits) / temperature 55 | 56 | return tf.math.softmax(noisy_logits) 57 | 58 | 59 | def get_mask(binary_vectors: List[tf.Tensor], g: List[float]): 60 | vectors = [g[i] * binary_vectors[i] for i in range(g.shape[0])] 61 | vectors = tf.stack(vectors, axis=0) 62 | vector = tf.reduce_sum(vectors, axis=0) 63 | return vector 64 | 65 | 66 | class ChannelMasking(tf.keras.layers.Layer): 67 | def __init__(self, min: int, max: int, step: int, name: str, gumble_noise=True, regularizer=None): 68 | super().__init__(name=name) 69 | self.min = min 70 | self.max = max 71 | self.step = step 72 | self.channel_sizes = [] 73 | self.gumble_noise = gumble_noise 74 | self.regularizer = regularizer 75 | for i in range(self.min, self.max+1, self.step): 76 | self.channel_sizes.append(i) 77 | 78 | def build(self, input_shape): 79 | self.alpha = self.add_weight(name=f"alpha", 80 | shape=(len(self.channel_sizes),), 81 | initializer=tf.keras.initializers.Constant(value=1.), regularizer=self.regularizer) 82 | self.binary_vectors = create_binary_vector(self.channel_sizes, dtype=self.alpha.dtype) 83 | 84 | def call(self, inputs): 85 | self.g = gumbel_softmax(self.alpha, self.gumble_noise) 86 | mask = get_mask(self.binary_vectors, self.g) 87 | # Convert mast from Float32 to Float16 during mixed precision. 88 | mask = tf.cast(mask, dtype=inputs.dtype) 89 | 90 | # work with channel last but not channel first 91 | if tf.keras.backend.image_data_format() == 'channels_first': 92 | mask = tf.reshape(mask, [1, self.channel_sizes[-1], 1, 1]) 93 | if type(inputs) == list: 94 | return [mask * inputs[i] for i in range(len(inputs))] 95 | else: 96 | return mask * inputs 97 | 98 | 99 | def exponential_decay(initial_value, decay_steps, decay_rate): 100 | """ 101 | Applies exponential decay to initial value 102 | Args: 103 | initial_value: The initial learning value 104 | decay_steps: Number of steps to decay over 105 | decay_rate: decay rate 106 | """ 107 | return lambda step: initial_value * decay_rate ** (step / decay_steps) 108 | 109 | 110 | def split_trainable_weights(model, arch_params_name='alpha'): 111 | """ 112 | split the model parameters in weights and architectural params 113 | """ 114 | weights = [] 115 | arch_params = [] 116 | for trainable_weight in model.trainable_variables: 117 | if arch_params_name in trainable_weight.name: 118 | arch_params.append(trainable_weight) 119 | else: 120 | weights.append(trainable_weight) 121 | if not arch_params: 122 | raise ValueError(f"No architecture parameters found by the name {arch_params_name}") 123 | return weights, arch_params 124 | 125 | 126 | def post_training_analysis(model, saved_file_path): 127 | layer_name = '' 128 | saved_file_content = {} 129 | for layer in model.layers: 130 | # if type(layer) == tf.keras.Conv2D: 131 | # layer_name = layer.name 132 | if type(layer) == ChannelMasking and layer.name[-8:] == '_savable': 133 | layer_name = layer.name[:-8] 134 | max_alpha_id = int(tf.math.argmax(layer.alpha).numpy()) 135 | value = layer.min + max_alpha_id * layer.step 136 | saved_file_content[layer_name] = value 137 | print(saved_file_content) 138 | with open(saved_file_path, 'w') as f: 139 | yaml.dump(saved_file_content, f) 140 | 141 | 142 | def save_arch_params(model, epoch, log_dir): 143 | json_file_path = os.path.join(log_dir, f'alpha.json') 144 | content = {} 145 | if os.path.exists(json_file_path): 146 | with open(json_file_path) as f: 147 | content = json.load(f) 148 | for layer in model.layers: 149 | if type(layer) == ChannelMasking: 150 | # need to convert from numpy.float32 to pure python float32 to prepare the dumps 151 | if str(epoch) not in content: 152 | content[str(epoch)] = {} 153 | content[str(epoch)][layer.name] = list(map(float, layer.alpha.numpy())) 154 | with open(json_file_path, 'w') as f: 155 | f.write(json.dumps(content)) 156 | -------------------------------------------------------------------------------- /scripts/test_tfrecord_writer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import tempfile 4 | import unittest 5 | import cv2 6 | import numpy as np 7 | import yaml 8 | from tfrecord_writer import build_tfrecord_dataset 9 | 10 | 11 | class TestTfrecordWriter(unittest.TestCase): 12 | def test_process_images_in_class_directory(self): 13 | num_examples_each_class = 10 14 | data_dir = create_fake_dataset(num_examples_each_class) 15 | name = 'Test-dataset' 16 | description = 'A small test datset' 17 | tfrecord_dir_path = tempfile.mkdtemp() 18 | 19 | args = {'name': name, 'description': description, 'tfrecord_dir_path': tfrecord_dir_path, 20 | 'tfrecord_size': 2, 'preprocessing': 'NO', 'image_size': (224, 224), "n_tfrecords":0, 21 | 'data': {'images_dir_path': data_dir, 22 | 'annotation_file_path': None, 23 | 'delimiter': ',', 24 | 'header_exists': False, 25 | 'split_names': ['train', 'validation', 'test'], 26 | 'split_percentages': [0.8, 0.1, 0.1], 27 | } 28 | } 29 | build_tfrecord_dataset(args) 30 | 31 | dataset_info = load_yaml(data_dir=tfrecord_dir_path, dataset_name=name) 32 | 33 | # check newly created datset name and description 34 | self.assertEqual(name, dataset_info['name']) 35 | self.assertEqual(description, dataset_info['description']) 36 | 37 | i = 0 38 | # check split percentage 39 | for split_name, split_items in dataset_info['splits'].items(): 40 | num_exmaples = split_items['num_examples'] 41 | self.assertAlmostEqual(args['data']['split_percentages'][i], num_exmaples / (2.0 * num_examples_each_class)) 42 | i += 1 43 | 44 | shutil.rmtree(data_dir) 45 | shutil.rmtree(tfrecord_dir_path) 46 | 47 | def test_process_images_in_class_directory_fixed_number_of_tfrecord(self): 48 | num_examples_each_class = 10 49 | data_dir = create_fake_dataset(num_examples_each_class) 50 | name = 'Test-dataset' 51 | description = 'A small test datset' 52 | tfrecord_dir_path = tempfile.mkdtemp() 53 | 54 | args = {'name': name, 'description': description, 'tfrecord_dir_path': tfrecord_dir_path, 55 | 'tfrecord_size': 20, 'preprocessing': 'NO', 'image_size': (224, 224), "n_tfrecords":5, 56 | 'data': {'images_dir_path': data_dir, 57 | 'annotation_file_path': None, 58 | 'delimiter': ',', 59 | 'header_exists': False, 60 | 'split_names': ['train', 'validation', 'test'], 61 | 'split_percentages': [0.8, 0.1, 0.1], 62 | } 63 | } 64 | build_tfrecord_dataset(args) 65 | print(tfrecord_dir_path) 66 | dataset_info = load_yaml(data_dir=tfrecord_dir_path, dataset_name=name) 67 | 68 | # check newly created datset name and description 69 | self.assertEqual(name, dataset_info['name']) 70 | self.assertEqual(description, dataset_info['description']) 71 | 72 | i = 0 73 | # check split percentage 74 | for split_name, split_items in dataset_info['splits'].items(): 75 | num_exmaples = split_items['num_examples'] 76 | self.assertAlmostEqual(args['data']['split_percentages'][i], num_exmaples / (2.0 * num_examples_each_class)) 77 | i += 1 78 | 79 | # check number of tfrecords 80 | self.assertEqual(5, len(dataset_info['splits']['train']['tfrecord_files'])) 81 | 82 | shutil.rmtree(data_dir) 83 | shutil.rmtree(tfrecord_dir_path) 84 | 85 | 86 | 87 | def test_process_with_annotation_file(self): 88 | num_examples_each_class = 10 89 | data_dir, annotation_file = create_fake_dataset_with_annotation_file(num_examples_each_class) 90 | name = 'Test-dataset' 91 | description = 'A small test datset' 92 | tfrecord_dir_path = tempfile.mkdtemp() 93 | 94 | args = {'name': name, 'description': description, 'tfrecord_dir_path': tfrecord_dir_path, 95 | 'tfrecord_size': 2, 'preprocessing': 'NO', 'image_size': (224, 224), "n_tfrecords":0, 96 | 'data': {'images_dir_path': data_dir, 97 | 'annotation_file_path': annotation_file, 98 | 'delimiter': ',', 99 | 'header_exists': False, 100 | 'split_names': ['train', 'validation', 'test'], 101 | 'split_percentages': [0.8, 0.1, 0.1], 102 | } 103 | } 104 | build_tfrecord_dataset(args) 105 | 106 | dataset_info = load_yaml(data_dir=tfrecord_dir_path, dataset_name=name) 107 | 108 | # check newly created datset name and description 109 | self.assertEqual(name, dataset_info['name']) 110 | self.assertEqual(description, dataset_info['description']) 111 | 112 | i = 0 113 | # check split percentage 114 | for split_name, split_items in dataset_info['splits'].items(): 115 | num_exmaples = split_items['num_examples'] 116 | self.assertAlmostEqual(args['data']['split_percentages'][i], num_exmaples / (2.0 * num_examples_each_class)) 117 | i += 1 118 | 119 | shutil.rmtree(data_dir) 120 | shutil.rmtree(tfrecord_dir_path) 121 | 122 | 123 | def load_yaml(data_dir, dataset_name): 124 | yaml_file = os.path.join(data_dir, dataset_name, 'dataset_info.yaml') 125 | with open(yaml_file, 'r') as stream: 126 | try: 127 | dataset_info = yaml.safe_load(stream) 128 | except yaml.YAMLError as e: 129 | print('Error parsing file', yaml_file) 130 | raise e 131 | return dataset_info 132 | 133 | 134 | def create_fake_dataset(n_images_per_class=2): 135 | dataset_dir = tempfile.mkdtemp() 136 | os.makedirs(os.path.join(dataset_dir, 'cat'), exist_ok=True) 137 | os.makedirs(os.path.join(dataset_dir, 'dog'), exist_ok=True) 138 | for i in range(n_images_per_class): 139 | cv2.imwrite(os.path.join(dataset_dir, 'dog', '{}.jpg'.format(i)), np.ones((640, 480, 3), dtype=np.uint8) * 255) 140 | cv2.imwrite(os.path.join(dataset_dir, 'cat', '{}.jpg'.format(i)), np.ones((640, 480, 3), dtype=np.uint8) * 255) 141 | return dataset_dir 142 | 143 | 144 | def create_fake_dataset_with_annotation_file(n_images_per_class=2): 145 | dataset_dir = tempfile.mkdtemp() 146 | os.makedirs(dataset_dir, exist_ok=True) 147 | 148 | annotation_file = os.path.join(dataset_dir, 'annotations.txt') 149 | 150 | labels = ['cat', 'dog'] 151 | 152 | with open(annotation_file, 'w', encoding='utf-8') as f: 153 | for i in range(n_images_per_class*2): 154 | cv2.imwrite(os.path.join(dataset_dir, '{}.jpg'.format(i)), np.ones((640, 480, 3), dtype=np.uint8) * 255) 155 | line = '{}.jpg'.format(i) + "," + labels[i % 2] + "\n" 156 | f.write(line) 157 | 158 | return dataset_dir, annotation_file 159 | -------------------------------------------------------------------------------- /tests/system_tests/models_training.sh: -------------------------------------------------------------------------------- 1 | # train a mobilenet channel first with tensorflow 2 | # need a least 6 GB of VRAM 3 | python3 train.py \ 4 | --model.name MobileNetV2NCHW \ 5 | --model.factor 1 \ 6 | --model.num_classes 10 \ 7 | --model.input_size 224 224 3 \ 8 | --num_epochs 2 \ 9 | --checkpoint_dir /tmp/checkpoint \ 10 | --log_dir /tmp/results \ 11 | --dataloader.batch_size 128 \ 12 | --dataloader.name imagenette/full-size-v2 \ 13 | --early_stopping 100 \ 14 | --dataloader.train_list RandomCropThenResize Normalize RandomHorizontalFlip Cutout ColorJitter Translate \ 15 | --dataloader.val_list Normalize CentralCrop \ 16 | --dataloader.val_split_id validation \ 17 | --dataloader.train_split_id train \ 18 | --dataloader.Translate.width_shift_range 0.2 \ 19 | --dataloader.Translate.height_shift_range 0.2 \ 20 | --dataloader.RandomCrop.size 224 224 3 \ 21 | --dataloader.CentralCrop.size 224 224 \ 22 | --dataloader.Cutout.length 16 \ 23 | --optimizer.name sgd_nesterov \ 24 | --optimizer.lr 0.1 \ 25 | --optimizer.lr_decay_strategy.lr_params.patience 20 \ 26 | --optimizer.lr_decay_strategy.lr_params.strategy cosine_decay \ 27 | --optimizer.lr_decay_strategy.lr_params.decay_rate 0.3 \ 28 | --config.mixed_precision 29 | 30 | # Total params: 2,270,794 31 | # Trainable params: 2,236,682 32 | # Non-trainable params: 34,112 33 | # Epoch 2 takes 40 second using a GTX 1080 and should reach >30% validation accuracy 34 | 35 | rm -r /tmp/results 36 | rm -r /tmp/checkpoint 37 | 38 | # train a mobilenet channel last with tensorflow 39 | python3 train.py \ 40 | --model.name MobileNetV2 \ 41 | --model.factor 1 \ 42 | --model.num_classes 10 \ 43 | --model.input_size 224 224 3 \ 44 | --num_epochs 2 \ 45 | --checkpoint_dir /tmp/checkpoint \ 46 | --log_dir /tmp/results \ 47 | --dataloader.batch_size 128 \ 48 | --dataloader.name imagenette/full-size-v2 \ 49 | --early_stopping 100 \ 50 | --dataloader.train_list RandomCropThenResize Normalize RandomHorizontalFlip Cutout ColorJitter Translate \ 51 | --dataloader.val_list Normalize CentralCrop \ 52 | --dataloader.val_split_id validation \ 53 | --dataloader.train_split_id train \ 54 | --dataloader.Translate.width_shift_range 0.2 \ 55 | --dataloader.Translate.height_shift_range 0.2 \ 56 | --dataloader.RandomCrop.size 224 224 3 \ 57 | --dataloader.CentralCrop.size 224 224 \ 58 | --dataloader.Cutout.length 16 \ 59 | --optimizer.name sgd_nesterov \ 60 | --optimizer.lr 0.1 \ 61 | --optimizer.lr_decay_strategy.lr_params.patience 20 \ 62 | --optimizer.lr_decay_strategy.lr_params.strategy cosine_decay \ 63 | --optimizer.lr_decay_strategy.lr_params.decay_rate 0.3 \ 64 | --config.mixed_precision 65 | 66 | # Total params: 2,270,794 67 | # Trainable params: 2,236,682 68 | # Non-trainable params: 34,112 69 | # Epoch 2 takes 40 second using a GTX 1080 and should reach >30% validation accuracy 70 | 71 | rm -r /tmp/results 72 | rm -r /tmp/checkpoint 73 | 74 | # train a resnet channel last with tensorflow 75 | python3 train.py \ 76 | --model.name ResNet18 \ 77 | --model.factor 1 \ 78 | --model.num_classes 10 \ 79 | --model.input_size 224 224 3 \ 80 | --num_epochs 2 \ 81 | --checkpoint_dir /tmp/checkpoint \ 82 | --log_dir /tmp/results \ 83 | --dataloader.batch_size 128 \ 84 | --dataloader.name imagenette/full-size-v2 \ 85 | --early_stopping 100 \ 86 | --dataloader.train_list RandomCropThenResize Normalize RandomHorizontalFlip Cutout ColorJitter Translate \ 87 | --dataloader.val_list Normalize CentralCrop \ 88 | --dataloader.val_split_id validation \ 89 | --dataloader.train_split_id train \ 90 | --dataloader.Translate.width_shift_range 0.2 \ 91 | --dataloader.Translate.height_shift_range 0.2 \ 92 | --dataloader.RandomCrop.size 224 224 3 \ 93 | --dataloader.CentralCrop.size 224 224 \ 94 | --dataloader.Cutout.length 16 \ 95 | --optimizer.name sgd_nesterov \ 96 | --optimizer.lr 0.1 \ 97 | --optimizer.lr_decay_strategy.lr_params.patience 20 \ 98 | --optimizer.lr_decay_strategy.lr_params.strategy cosine_decay \ 99 | --optimizer.lr_decay_strategy.lr_params.decay_rate 0.3 \ 100 | --config.mixed_precision 101 | 102 | # Total params: 11,188,728 103 | # Trainable params: 11,180,920 104 | # Non-trainable params: 7,808 105 | 106 | # train a resnet channel first with tensorflow 107 | python3 train.py \ 108 | --model.name ResNet18NCHW \ 109 | --model.factor 1 \ 110 | --model.num_classes 10 \ 111 | --model.input_size 224 224 3 \ 112 | --num_epochs 2 \ 113 | --checkpoint_dir /tmp/checkpoint \ 114 | --log_dir /tmp/results \ 115 | --dataloader.batch_size 128 \ 116 | --dataloader.name imagenette/full-size-v2 \ 117 | --early_stopping 100 \ 118 | --dataloader.train_list RandomCropThenResize Normalize RandomHorizontalFlip Cutout ColorJitter Translate \ 119 | --dataloader.val_list Normalize CentralCrop \ 120 | --dataloader.val_split_id validation \ 121 | --dataloader.train_split_id train \ 122 | --dataloader.Translate.width_shift_range 0.2 \ 123 | --dataloader.Translate.height_shift_range 0.2 \ 124 | --dataloader.RandomCrop.size 224 224 3 \ 125 | --dataloader.CentralCrop.size 224 224 \ 126 | --dataloader.Cutout.length 16 \ 127 | --optimizer.name sgd_nesterov \ 128 | --optimizer.lr 0.1 \ 129 | --optimizer.lr_decay_strategy.lr_params.patience 20 \ 130 | --optimizer.lr_decay_strategy.lr_params.strategy cosine_decay \ 131 | --optimizer.lr_decay_strategy.lr_params.decay_rate 0.3 \ 132 | --config.mixed_precision 133 | 134 | # Total params: 11,188,728 135 | # Trainable params: 11,180,920 136 | # Non-trainable params: 7,808 137 | 138 | # train a resnet cifar channel last with tensorflow 139 | python3 train.py \ 140 | --model.name ResNet20CIFAR \ 141 | --model.factor 1 \ 142 | --model.num_classes 10 \ 143 | --model.input_size 224 224 3 \ 144 | --num_epochs 2 \ 145 | --checkpoint_dir /tmp/checkpoint \ 146 | --log_dir /tmp/results \ 147 | --dataloader.batch_size 128 \ 148 | --dataloader.name imagenette/full-size-v2 \ 149 | --early_stopping 100 \ 150 | --dataloader.train_list RandomCropThenResize Normalize RandomHorizontalFlip Cutout ColorJitter Translate \ 151 | --dataloader.val_list Normalize CentralCrop \ 152 | --dataloader.val_split_id validation \ 153 | --dataloader.train_split_id train \ 154 | --dataloader.Translate.width_shift_range 0.2 \ 155 | --dataloader.Translate.height_shift_range 0.2 \ 156 | --dataloader.RandomCrop.size 224 224 3 \ 157 | --dataloader.CentralCrop.size 224 224 \ 158 | --dataloader.Cutout.length 16 \ 159 | --optimizer.name sgd_nesterov \ 160 | --optimizer.lr 0.1 \ 161 | --optimizer.lr_decay_strategy.lr_params.patience 20 \ 162 | --optimizer.lr_decay_strategy.lr_params.strategy cosine_decay \ 163 | --optimizer.lr_decay_strategy.lr_params.decay_rate 0.3 \ 164 | --config.mixed_precision 165 | 166 | # Total params: 294,250 167 | # Trainable params: 292,874 168 | # Non-trainable params: 1,376 169 | -------------------------------------------------------------------------------- /src/data/dataloader.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List, Dict 3 | import tensorflow as tf 4 | import tensorflow_datasets as tfds 5 | from . import augmentations 6 | import yaml 7 | 8 | 9 | # Some of the common image classification datasets from tfds are added here. Exhaustive list can be found here 10 | # (https://www.tensorflow.org/datasets/catalog/overview) 11 | 12 | TENSORFLOW_DATASET_NAMES = [ 13 | "caltech101", 14 | "caltech_birds2010", 15 | "caltech_birds2011", 16 | "cars196", 17 | "cats_vs_dogs", 18 | "cifar10", 19 | "cifar10_1", # Test set for Cifar 10 20 | "cifar10_corrupted", # Generated by adding 15 common corruptions 4 extra corruptions to the test images in Cifar10 21 | "cifar100", 22 | "fashion_mnist", 23 | "food101", 24 | "imagenette/full-size-v2", # Imagenette is a subset of imagnet dataset with 10 classes (9,469 train images and 3,925 validation images) 25 | "i_naturalist2017", 26 | "mnist", 27 | "mnist_corrupted", # Generated by adding 15 common corruptions 4 extra corruptions to the test images in mnist 28 | "omniglot", 29 | "oxford_flowers102", 30 | "oxford_iiit_pet" 31 | ] 32 | 33 | arguments = [ 34 | ['list[str]', 'train_list', ['RandomCropThenResize', 'RandomHorizontalFlip', 'Normalize'], 'List all the data augmentations separated by comma for training data'], 35 | ['list[str]', 'val_list', ['Resize', 'CentralCrop', 'Normalize'], 'List all the data augmentations separated by comma for validation data'], 36 | [str, "data_dir", '', "directory to read/write data. Defaults to \"~/tensorflow_datasets\""], 37 | [str, 'name', 'imagenet', 'Choose the dataset to be used for training'], 38 | [str, 'train_split_id', 'train', ''], 39 | [str, 'val_split_id', 'validation', ''], 40 | [int, 'batch_size', 0, 'The size of batch per gpu', lambda x: x > 0], 41 | ] + augmentations.arguments 42 | 43 | 44 | def get_map_fn(transformation_list: List[str], param_dict: Dict, n_classes: int): 45 | """return the map function applying data augmentation to image and transforming label to one hot vector 46 | 47 | Args: 48 | param_dict: dict containing transformation name as key and list of corresponding parameters as value 49 | """ 50 | def map_fn(image, label): 51 | label = tf.one_hot(label, n_classes) 52 | image = augmentations.apply_list_of_transformations(image, transformation_list, param_dict) 53 | return image, label 54 | return map_fn 55 | 56 | 57 | class TFRecordExtractor: 58 | """ 59 | This class extracts the stored tfrecord from the specified directory based on their split (train/val/test) 60 | and creates a tensorflow dataset object. It assumes that all the tfrecord files are stored in the same directory than the dataset 61 | meta-data file named `dataset_info.yaml` 62 | """ 63 | 64 | def __init__(self, dataset_name, data_dir, split): 65 | if not os.path.exists(data_dir): 66 | raise ValueError(f"there is no directory by {data_dir}") 67 | if not os.path.exists(os.path.join(data_dir, dataset_name)): 68 | raise ValueError(f"there is no dataset by the name of {dataset_name} in directory {data_dir}") 69 | self.data_dir = os.path.join(data_dir, dataset_name) 70 | self.split = split 71 | 72 | def __get_tfrecord_files_from_dataset_info_file(self): 73 | """ 74 | get the name of all tfrecord files in the dataset meta-data file named `dataset_info.yaml` 75 | """ 76 | yaml_file = os.path.join(self.data_dir, 'dataset_info.yaml') 77 | with open(yaml_file, 'r') as stream: 78 | try: 79 | dataset_info = yaml.safe_load(stream) 80 | except yaml.YAMLError as e: 81 | print('Error parsing file', yaml_file) 82 | raise e 83 | tfrecord_files = [os.path.join(self.data_dir, path) for path in 84 | dataset_info["splits"][self.split]["tfrecord_files"]] 85 | return tfrecord_files 86 | 87 | def __extract_fn(self, tfrecord): 88 | """Extract tfrecord and decode it to image and label 89 | """ 90 | feature_description = { 91 | 'image': tf.io.FixedLenFeature([], tf.string), 92 | 'label': tf.io.FixedLenFeature([], tf.int64), 93 | 'size': tf.io.FixedLenFeature([2], tf.int64) 94 | } 95 | # Extract the data record 96 | sample = tf.io.parse_single_example(tfrecord, feature_description) 97 | image = tf.io.decode_image(sample['image'], channels=3) 98 | image = tf.reshape(image, [sample['size'][0], sample['size'][1], 3]) # TODO this line should be useless ? 99 | label = sample['label'] 100 | return (image, label) 101 | 102 | def get_tf_dataset(self): 103 | """Creates tensorflow dataset object from `tfrecord_files` 104 | """ 105 | tfrecord_files = self.__get_tfrecord_files_from_dataset_info_file() 106 | return tf.data.TFRecordDataset(tfrecord_files).map(self.__extract_fn) 107 | 108 | 109 | def is_training(config, split): 110 | return split == config['train_split_id'] 111 | 112 | 113 | def get_dataset_from_custom_tfrecord(config, transformation_list: List[str], num_classes: int, split, 114 | num_parallel_calls=tf.data.experimental.AUTOTUNE, buffer_multiplier=15): 115 | """load a custom dataset from tf record 116 | """ 117 | map_fn = get_map_fn(transformation_list, config, num_classes) 118 | # list_files shuffle the files name 119 | dataset = TFRecordExtractor(config['name'], config['data_dir'], split).get_tf_dataset() 120 | 121 | if is_training(config, split): 122 | dataset = dataset.shuffle(config['batch_size'] * buffer_multiplier) 123 | dataset = dataset.map(map_fn, num_parallel_calls=num_parallel_calls).\ 124 | batch(config['batch_size']).\ 125 | prefetch(tf.data.experimental.AUTOTUNE) 126 | return dataset 127 | 128 | 129 | def get_dataset_from_tfds(config, transformation_list: List[str], num_classes: int, split, 130 | num_parallel_calls=tf.data.experimental.AUTOTUNE, buffer_multiplier=15): 131 | """load a dataset managed by tfds 132 | 133 | Args: 134 | config : Dict containing 'name', 'data_dir', 'batch_size' 135 | split: 'train' or 'test' 136 | """ 137 | dataset = tfds.load(name=config['name'], split=split, data_dir=config['data_dir'], 138 | shuffle_files=is_training, as_supervised=True) 139 | map_fn = get_map_fn(transformation_list, config, num_classes) 140 | if is_training(config, split): 141 | dataset = dataset.shuffle(config['batch_size'] * buffer_multiplier) 142 | 143 | dataset = dataset.map(map_fn, num_parallel_calls=num_parallel_calls).\ 144 | batch(config['batch_size']).\ 145 | prefetch(tf.data.experimental.AUTOTUNE) 146 | return dataset 147 | 148 | 149 | # TODO test with autotune, else replace to a value in conf 150 | def get_dataset(config, transformation_list: List[str], num_classes: int, split, 151 | num_parallel_calls=tf.data.experimental.AUTOTUNE, buffer_multiplier=15): 152 | get_function = get_dataset_from_tfds if config["name"] in TENSORFLOW_DATASET_NAMES else get_dataset_from_custom_tfrecord 153 | return get_function(config, transformation_list, num_classes, split, num_parallel_calls, buffer_multiplier) 154 | -------------------------------------------------------------------------------- /src/inference_benchmark.py: -------------------------------------------------------------------------------- 1 | """this script should be called by ../inference_benchmark.py in a docker 2 | it has very few dependance so it should run on any environment having tensorflow installed 3 | Please, only import official python package or tensorflow dependance 4 | """ 5 | import argparse 6 | import json 7 | import os 8 | import shutil 9 | import time 10 | from collections import defaultdict 11 | 12 | import numpy as np 13 | import tensorflow as tf 14 | from tensorflow.python.framework import convert_to_constants 15 | from tensorflow.python.saved_model import tag_constants 16 | 17 | from models import model_name_to_class 18 | 19 | import sys 20 | sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])), '../submodules/global_dl/training')) 21 | from trt_convert import convert_to_tensorrt 22 | 23 | 24 | # This method was working with previous version of Tensorflow, but doesn't seem to work with TF 2.4. 25 | # I didn't find anything in the doc to reimplement it with newer method of tensorflow. But maybe it will be 26 | # possible with next version of TF 27 | # def get_gpu_info() -> defaultdict: 28 | # """ 29 | # Returns: e.g. 30 | # defaultdict(, 31 | # { 32 | # 'device': ['0', '1'], 33 | # 'name': ['TITAN V', 'Quadro RTX 8000'], 34 | # 'pci bus id': ['0000:41:00.0', '0000:07:00.0'], 35 | # 'compute capability': ['7.0', '7.5'] 36 | # }) 37 | # """ 38 | # get_info = defaultdict(list) 39 | # for i in tf.config.list_physical_devices(): 40 | # if "compute capability: " in i.physical_device_desc: 41 | # for j in i.physical_device_desc.split(", "): 42 | # get_info[j.split(": ")[0]].append(j.split(": ")[1]) 43 | # # TODO handle the else part 44 | # return get_info 45 | 46 | 47 | def str2bool(v): 48 | """custom argpase type to be able to parse boolean 49 | see : 50 | https://stackoverflow.com/questions/15008758/parsing-boolean-values-with-argparse 51 | """ 52 | if isinstance(v, bool): 53 | return v 54 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 55 | return True 56 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 57 | return False 58 | else: 59 | raise argparse.ArgumentTypeError('Boolean value expected.') 60 | 61 | 62 | def parse_args(): 63 | """Parse the command line and return the configuration to run the benchmark 64 | 65 | Returns: 66 | tuple: name of the model, engine 67 | """ 68 | desc = "Inference benchmark" 69 | parser = argparse.ArgumentParser(description=desc) 70 | # Set of parameters needed for every benchmark 71 | parser.add_argument('--batch_size', type=int, default=1, help='batch_size') 72 | parser.add_argument('--engine', type=str, default="tensorflow", help='Specify the engine: tensorflow or upstride_X') 73 | parser.add_argument('--factor', type=float, default=1., help='factor to divide the number of channels') 74 | parser.add_argument('--model_name', type=str, default='VGG16', help='Specify the name of the model') 75 | parser.add_argument('--n_steps', type=int, default=20, help='number of iterations to benchmark speed') 76 | parser.add_argument('--profiler_path', type=str, default="/tmp/prof", help='path where the tensorboard profiler will be saved') 77 | parser.add_argument('--xla', type=str2bool, nargs='?', const=True, default=False, help='if specify then run XLA compilation') 78 | 79 | # set or parameters specific to TensorRT 80 | parser.add_argument("--export_tensorrt", type=str2bool, nargs='?', const=True, default=False, help="specify if model requires tensorrt conversion") 81 | parser.add_argument('--tensorrt_precision', type=str, default='FP32', help='Provide precision FP32 or FP16 for optimizing tensorrt') 82 | 83 | # If need to load a trained model 84 | parser.add_argument('--model_path', type=str, default=None, help='Specify the model path') 85 | 86 | args = parser.parse_args() 87 | return vars(args) 88 | 89 | 90 | def random_data_iterator(shape, n, min, max): 91 | """Simulate a dataset by generating random normalized images 92 | 93 | Args: 94 | shape (List): shape of the image. In most of the cases, (224, 224, 3) 95 | n (int): number of images to generate 96 | min (float): min of the random distribution 97 | max (float): max of the random distribution 98 | 99 | Yields: 100 | np.ndarray: generated images 101 | """ 102 | for _ in range(n): 103 | data = np.random.random(shape) # between [0, 1) 104 | data = data * (max-min) + min 105 | yield tf.constant(data.astype(np.float32)) 106 | 107 | 108 | def model_load_serve(path): 109 | saved_model = tf.saved_model.load(path, tags=[tag_constants.SERVING]) 110 | model = saved_model.signatures['serving_default'] 111 | return convert_to_constants.convert_variables_to_constants_v2(model) 112 | 113 | 114 | def benchmark(config): 115 | # 1 Tensorflow configuration 116 | # GPU should be configured to have a progressive memory growth, else some specific configurations may crashed (TF 2.0 on RTX2000 for instance) 117 | physical_devices = tf.config.list_physical_devices('GPU') 118 | for physical_device in physical_devices: 119 | tf.config.experimental.set_memory_growth(physical_device, True) 120 | if config['xla']: 121 | tf.config.optimizer.set_jit(True) 122 | 123 | # 2 Model preparation 124 | if config['engine'] == 'tensorflow': 125 | upstride_type = -1 126 | else: 127 | upstride_type = int(config['engine'][-1]) 128 | 129 | kwargs = { 130 | 'input_size': [224, 224, 3], 131 | 'num_classes': 10, 132 | 'factor': config['factor'], 133 | 'upstride_type': upstride_type, 134 | 'changing_ids': [] 135 | } 136 | 137 | model = model_name_to_class[config['model_name']](**kwargs).build() 138 | n_params = model.count_params() 139 | 140 | # 3 Maybe convert to TensorRT. 141 | # To do this, we save the model, remove it from memory then reload it using TensorRT 142 | tmp_dir = "/tmp/temp_dir" 143 | if config['export_tensorrt']: 144 | # save the model 145 | tf.saved_model.save(model, tmp_dir) 146 | 147 | # Remove it 148 | del model 149 | 150 | # Reload using tensorRT 151 | trt_path = convert_to_tensorrt( 152 | tmp_dir, 153 | image_size=[224, 224, 3], # for now its hard coded. 154 | batch_size=config['batch_size'], 155 | precision=config['tensorrt_precision'] 156 | ) 157 | print(f'loading TensorRT model from path {trt_path}') 158 | model = model_load_serve(trt_path) 159 | 160 | # 4 prepare the environment 161 | os.makedirs(config['profiler_path'], exist_ok=True) 162 | 163 | # A few iteration to init the model 164 | print("first iteration") 165 | for data in random_data_iterator((config['batch_size'], 224, 224, 3), 1, -1, 1): 166 | model(data) 167 | print("first iteration done") 168 | 169 | os.makedirs(config['profiler_path'], exist_ok=True) 170 | tf.profiler.experimental.start(os.path.join(config['profiler_path'], 'logs_{}'.format(config['engine']))) 171 | for data in random_data_iterator((config['batch_size'], 224, 224, 3), 5, -1, 1): 172 | model(data) 173 | tf.profiler.experimental.stop() 174 | 175 | # Benchmark 176 | start_time = time.time() 177 | for data in random_data_iterator((config['batch_size'], 224, 224, 3), config['n_steps'], -1, 1): 178 | model(data) 179 | end_time = time.time() 180 | 181 | # TODO reactivate this part as soon as we know how to do it with modern version of TF 182 | # try: 183 | # gpu = get_gpu_info().get('name')[0] 184 | # except TypeError: 185 | # gpu = 'cpu only' 186 | 187 | output = { 188 | 'total_time': end_time - start_time, 189 | 'n_iterations': config['n_steps'], 190 | 'n_params': n_params, 191 | 'tensorrt': config['export_tensorrt'], 192 | # 'gpu': gpu 193 | } 194 | print(json.dumps(output)) 195 | 196 | # clean up 197 | if os.path.exists(tmp_dir): 198 | shutil.rmtree(tmp_dir) 199 | 200 | 201 | if __name__ == "__main__": 202 | config = parse_args() 203 | benchmark(config) 204 | -------------------------------------------------------------------------------- /src/test_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import tempfile 4 | import unittest 5 | 6 | import cv2 7 | import numpy as np 8 | import tensorflow as tf 9 | 10 | from .data import dataloader 11 | from .data.test_dataloader import create_fake_dataset 12 | from .models.generic_model import GenericModel 13 | from .utils import (init_custom_checkpoint_callbacks, copy_and_resize, get_imagenet_data, get_partial_paths, 14 | get_paths, get_synset, get_val_label_dict) 15 | 16 | 17 | class Model1(GenericModel): 18 | def model(self): 19 | self.x = self.layers().Flatten()(self.x) 20 | self.x = self.layers().Dense(10)(self.x) 21 | 22 | 23 | class TestUtils(unittest.TestCase): 24 | def test_copy_and_resize(self): 25 | source = create_fake_dataset() 26 | dest = tempfile.mkdtemp() 27 | copy_and_resize(source, dest, 256) 28 | self.assertEqual(sorted(os.listdir(dest)), ['cat', 'dog']) 29 | self.assertEqual(sorted(os.listdir(os.path.join(dest, 'cat'))), ['0.jpg', '1.jpg']) 30 | self.assertEqual(sorted(os.listdir(os.path.join(dest, 'dog'))), ['0.jpg', '1.jpg']) 31 | self.assertEqual(cv2.imread(os.path.join(dest, 'dog', '1.jpg')).shape[0], 256) 32 | shutil.rmtree(source) 33 | shutil.rmtree(dest) 34 | 35 | def test_copy_and_resize_final_slash(self): 36 | source = create_fake_dataset() 37 | source += '/' 38 | dest = tempfile.mkdtemp() 39 | copy_and_resize(source, dest, 256) 40 | self.assertEqual(sorted(os.listdir(dest)), ['cat', 'dog']) 41 | self.assertEqual(sorted(os.listdir(os.path.join(dest, 'cat'))), ['0.jpg', '1.jpg']) 42 | self.assertEqual(sorted(os.listdir(os.path.join(dest, 'dog'))), ['0.jpg', '1.jpg']) 43 | self.assertEqual(cv2.imread(os.path.join(dest, 'dog', '1.jpg')).shape[0], 256) 44 | shutil.rmtree(source) 45 | shutil.rmtree(dest) 46 | 47 | def test_get_synset(self): 48 | synset = get_synset("ressources/testing/fake_LOC_synset_mapping.txt") 49 | self.assertEqual(synset['n01496331'], 5) 50 | 51 | def test_get_paths(self): 52 | training_dataset = create_fake_training_data() 53 | val_dataset = create_fake_val_data() 54 | training_images = get_paths(training_dataset) 55 | val_images = get_paths(val_dataset) 56 | self.assertEqual(len(training_images), 20) 57 | self.assertEqual(len(val_images), 2) 58 | 59 | def test_get_val_label_dict(self): 60 | val_dict = get_val_label_dict("ressources/testing/fake_LOC_val_solution.csv") 61 | self.assertEqual(val_dict['ILSVRC2012_val_0'], 'n01484850') 62 | 63 | def test_get_partial_paths(self): 64 | training = create_fake_training_data() 65 | paths = get_partial_paths(training, 50) 66 | self.assertEqual(len(paths), 10) 67 | paths = get_partial_paths(training, 100) 68 | self.assertEqual(len(paths), 20) 69 | paths = get_partial_paths(training, 40) 70 | self.assertEqual(len(paths), 0) 71 | 72 | def test_get_imagenet_data(self): 73 | train_dir = create_fake_training_data() 74 | val_dir = create_fake_val_data() 75 | synset_path = "ressources/testing/fake_LOC_synset_mapping.txt" 76 | training_percentage = 100 77 | val_gt_path = "ressources/testing/fake_LOC_val_solution.csv" 78 | imagenet_data = get_imagenet_data({'synset_path': synset_path, 79 | 'train_dir': train_dir, 80 | 'train_data_percentage': training_percentage, 81 | 'val_dir': val_dir, 82 | 'val_gt_path': val_gt_path}) 83 | train_paths, train_labels, val_paths, val_labels = imagenet_data 84 | self.assertEqual(type(train_paths[0]), str) 85 | self.assertEqual(type(val_paths[0]), str) 86 | self.assertEqual(type(train_labels[0]), int) 87 | self.assertEqual(type(val_labels[0]), int) 88 | self.assertEqual(len(train_paths), 20) 89 | self.assertEqual(len(train_labels), 20) 90 | self.assertEqual(len(val_paths), 2) 91 | self.assertEqual(len(val_labels), 2) 92 | 93 | def test_data_pipeline_imagenet_data(self): 94 | train_dir = create_fake_training_data() 95 | val_dir = create_fake_val_data() 96 | synset_path = "ressources/testing/fake_LOC_synset_mapping.txt" 97 | training_percentage = 100 98 | val_gt_path = "ressources/testing/fake_LOC_val_solution.csv" 99 | imagenet_data = get_imagenet_data({'synset_path': synset_path, 100 | 'train_dir': train_dir, 101 | 'train_data_percentage': training_percentage, 102 | 'val_dir': val_dir, 103 | 'val_gt_path': val_gt_path}) 104 | train_paths, train_labels, val_paths, val_labels = imagenet_data 105 | dataset = dataloader.get_dataset(train_paths, train_labels, n_classes=10, batch_size=2) 106 | i = 0 107 | for image, label in dataset: 108 | self.assertEqual(label.numpy().shape, (2, 10)) 109 | self.assertTrue(label.numpy()[0, 0] in [0, 1]) 110 | self.assertTrue(label.numpy()[1, 1] in [0, 1]) 111 | self.assertEqual(image.numpy().shape, (2, 224, 224, 3)) 112 | i += 1 113 | 114 | self.assertEqual(i, 10) 115 | 116 | shutil.rmtree(train_dir) 117 | shutil.rmtree(val_dir) 118 | 119 | def test_init_custom_checkpoint_callbacks(self): 120 | model = Model1('tensorflow', factor=1).model 121 | optimizer = tf.keras.optimizers.Adam(lr=0.001) 122 | model.compile(optimizer=optimizer, loss='categorical_crossentropy') 123 | ckpt_dir = tempfile.mkdtemp() 124 | callback, latest_epoch = init_custom_checkpoint_callbacks(model, optimizer, ckpt_dir, max_ckpt=5, save_frequency=1) 125 | self.assertEqual(os.listdir(ckpt_dir), []) 126 | self.assertEqual(latest_epoch, 0) 127 | 128 | # train for one epoch 129 | train_dir = create_fake_training_data() 130 | val_dir = create_fake_val_data() 131 | synset_path = "ressources/testing/fake_LOC_synset_mapping.txt" 132 | training_percentage = 100 133 | val_gt_path = "ressources/testing/fake_LOC_val_solution.csv" 134 | imagenet_data = get_imagenet_data({'synset_path': synset_path, 135 | 'train_dir': train_dir, 136 | 'train_data_percentage': training_percentage, 137 | 'val_dir': val_dir, 138 | 'val_gt_path': val_gt_path}) 139 | train_paths, train_labels, val_paths, val_labels = imagenet_data 140 | train_dataset = dataloader.get_dataset(train_paths, train_labels, n_classes=10, batch_size=2) 141 | 142 | model.fit(x=train_dataset, 143 | epochs=1, 144 | callbacks=[callback], 145 | max_queue_size=16, 146 | workers=8, 147 | ) 148 | 149 | # check that ckpts were written 150 | files = os.listdir(ckpt_dir) 151 | self.assertTrue('checkpoint' in files) 152 | self.assertTrue('ckpt-1.index' in files) 153 | self.assertTrue('ckpt-1.data-00000-of-00002' in files) 154 | self.assertTrue('ckpt-1.data-00001-of-00002' in files) 155 | 156 | # train for 5 more epochs 157 | model.fit(x=train_dataset, 158 | epochs=5, 159 | callbacks=[callback], 160 | max_queue_size=16, 161 | workers=8, 162 | ) 163 | 164 | # check that ckpt-1 was remove and ckpt 2,3,4,5,6 added 165 | files = os.listdir(ckpt_dir) 166 | self.assertTrue('checkpoint' in files) 167 | self.assertFalse('ckpt-1.index' in files) 168 | self.assertTrue('ckpt-2.index' in files) 169 | self.assertTrue('ckpt-3.index' in files) 170 | self.assertTrue('ckpt-4.index' in files) 171 | self.assertTrue('ckpt-5.index' in files) 172 | self.assertTrue('ckpt-6.index' in files) 173 | 174 | # check that we can load the last checkpoint 175 | del model 176 | del optimizer 177 | del callback 178 | model = Model1('tensorflow', factor=1).model 179 | optimizer = tf.keras.optimizers.Adam(lr=0.001) 180 | model.compile(optimizer=optimizer, loss='categorical_crossentropy') 181 | callback, latest_epoch = init_custom_checkpoint_callbacks(model, optimizer, ckpt_dir, max_ckpt=5, , save_frequency=1) 182 | self.assertEqual(latest_epoch, 6) 183 | 184 | shutil.rmtree(train_dir) 185 | shutil.rmtree(val_dir) 186 | shutil.rmtree(ckpt_dir) 187 | 188 | 189 | def create_fake_training_data(): 190 | dataset_dir = tempfile.mkdtemp() 191 | dirs = ['n01440764', 'n01443537', 'n01484850', 'n01491361', 'n01494475', 'n01496331', 192 | 'n01498041', 'n01514668', 'n01514859', 'n01518878'] 193 | for d in dirs: 194 | os.makedirs(os.path.join(dataset_dir, d)) 195 | for i in range(2): 196 | cv2.imwrite(os.path.join(dataset_dir, d, '{}_{}.JPEG'.format(d, i)), np.ones((640, 480, 3), dtype=np.uint8) * 255) 197 | return dataset_dir 198 | 199 | 200 | def create_fake_val_data(): 201 | dataset_dir = tempfile.mkdtemp() 202 | for i in range(2): 203 | cv2.imwrite(os.path.join(dataset_dir, 'ILSVRC2012_val_{}.JPEG'.format(i)), np.ones((640, 480, 3), dtype=np.uint8) * 255) 204 | return dataset_dir 205 | -------------------------------------------------------------------------------- /src/models/mobilenet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .generic_model import GenericModelBuilder 3 | 4 | BATCHNORM_MOMENTUM = 0.9 5 | 6 | # This function is taken from the original tf repo. 7 | # It ensures that all layers have a channel number that is divisible by 8 8 | # It can be seen here: 9 | # https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 10 | 11 | 12 | def _make_divisible(v, divisor, min_value=None): 13 | if min_value is None: 14 | min_value = divisor 15 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 16 | # Make sure that round down does not go down by more than 10%. 17 | if new_v < 0.9 * v: 18 | new_v += divisor 19 | return new_v 20 | 21 | 22 | def correct_pad(inputs, kernel_size, is_channels_first): 23 | """Returns a tuple for zero-padding for 2D convolution with downsampling. 24 | Args: 25 | input_size: An integer or tuple/list of 2 integers. 26 | kernel_size: An integer or tuple/list of 2 integers. 27 | Returns: 28 | A tuple. 29 | """ 30 | if type(inputs) == list: 31 | inputs = inputs[0] 32 | input_size = inputs.shape[2:4] if is_channels_first else inputs.shape[1:3] 33 | if isinstance(kernel_size, int): 34 | kernel_size = (kernel_size, kernel_size) 35 | adjust = (1, 1) if input_size[0] is None else (1 - input_size[0] % 2, 1 - input_size[1] % 2) 36 | correct = (kernel_size[0] // 2, kernel_size[1] // 2) 37 | return ((correct[0] - adjust[0], correct[0]), (correct[1] - adjust[1], correct[1])) 38 | 39 | 40 | class _MobileNetV2(GenericModelBuilder): 41 | def __init__(self, *args, **kwargs): 42 | super().__init__(*args, **kwargs) 43 | self.last_block_output_shape = 3 44 | 45 | def _inverted_res_block(self, x, expansion, stride, alpha, filters, block_id): 46 | layers = self.layers # we don't want to switch between tf and upstride in this block 47 | in_channels = self.last_block_output_shape 48 | 49 | pointwise_conv_filters = int(filters * alpha) 50 | pointwise_filters = _make_divisible(pointwise_conv_filters, 8) 51 | inputs = x 52 | prefix = 'block_{}_'.format(block_id) 53 | 54 | if block_id: 55 | # Expand 56 | x = layers.Conv2D((expansion * in_channels), kernel_size=1, padding='same', use_bias=False, name=prefix + 'expand', kernel_regularizer=self.weight_regularizer)(x) 57 | x = layers.BatchNormalization(axis=self.channel_axis, epsilon=1e-3, momentum=BATCHNORM_MOMENTUM, name=prefix + 'expand_BN')(x) 58 | x = layers.ReLU(6., name=prefix + 'expand_relu')(x) 59 | else: 60 | prefix = 'expanded_conv_' 61 | 62 | # Depthwise 63 | if stride == 2: 64 | x = layers.ZeroPadding2D(padding=correct_pad(x, 3, self.is_channels_first), name=prefix + 'pad')(x) 65 | x = layers.DepthwiseConv2D(kernel_size=3, strides=stride, activation=None, use_bias=False, padding='same' if stride == 1 else 'valid', 66 | name=prefix + 'depthwise', depthwise_regularizer=self.weight_regularizer)(x) 67 | x = layers.BatchNormalization(axis=self.channel_axis, epsilon=1e-3, momentum=BATCHNORM_MOMENTUM, name=prefix + 'depthwise_BN')(x) 68 | 69 | x = layers.ReLU(6., name=prefix + 'depthwise_relu')(x) 70 | 71 | # Project 72 | x = layers.Conv2D(pointwise_filters, kernel_size=1, padding='same', use_bias=False, activation=None, 73 | name=prefix + 'project', kernel_regularizer=self.weight_regularizer)(x) 74 | x = layers.BatchNormalization(axis=self.channel_axis, epsilon=1e-3, momentum=BATCHNORM_MOMENTUM, name=prefix + 'project_BN')(x) 75 | 76 | if in_channels == pointwise_filters and stride == 1: 77 | x = layers.Add(name=prefix + 'add')([inputs, x]) 78 | self.last_block_output_shape = pointwise_filters 79 | return x 80 | 81 | def model(self, x, alpha=1.0): 82 | """Instantiates the MobileNetV2 architecture. 83 | Args: 84 | alpha: controls the width of the network. This is known as the 85 | width multiplier in the MobileNetV2 paper, but the name is kept for 86 | consistency with MobileNetV1 in Keras. 87 | - If `alpha` < 1.0, proportionally decreases the number 88 | of filters in each layer. 89 | - If `alpha` > 1.0, proportionally increases the number 90 | of filters in each layer. 91 | - If `alpha` = 1, default number of filters from the paper 92 | are used at each layer. 93 | """ 94 | weight_regularizer = self.weight_regularizer 95 | 96 | first_block_filters = _make_divisible(32 * alpha // self.factor, 8) 97 | x = self.layers.ZeroPadding2D(padding=correct_pad(x, 3, self.is_channels_first), name='Conv1_pad')(x) 98 | x = self.layers.Conv2D(first_block_filters, kernel_size=3, strides=self.first_conv_stride, padding='valid', 99 | use_bias=False, name='Conv1', kernel_regularizer=self.weight_regularizer)(x) 100 | x = self.layers.BatchNormalization(axis=self.channel_axis, epsilon=1e-3, momentum=BATCHNORM_MOMENTUM, name='bn_Conv1')(x) 101 | x = self.layers.ReLU(6., name='Conv1_relu')(x) 102 | 103 | self.last_block_output_shape = first_block_filters 104 | 105 | block_id = 0 106 | for configuration in self.configurations: 107 | for i in range(configuration[1]): 108 | stride = configuration[2] if i == 0 else 1 109 | x = self._inverted_res_block(x, filters=configuration[0]//self.factor, alpha=alpha, stride=stride, expansion=configuration[3], block_id=block_id) 110 | block_id += 1 111 | 112 | # no alpha applied to last conv as stated in the paper: 113 | # if the width multiplier is greater than 1 we 114 | # increase the number of output channels 115 | if alpha > 1.0: 116 | last_block_filters = _make_divisible(1280 * alpha, 8) 117 | else: 118 | last_block_filters = 1280 119 | last_block_filters = last_block_filters // self.factor 120 | 121 | x = self.layers.Conv2D(last_block_filters, kernel_size=1, use_bias=False, name='Conv_1', kernel_regularizer=self.weight_regularizer)(x) 122 | x = self.layers.BatchNormalization(axis=self.channel_axis, epsilon=1e-3, momentum=BATCHNORM_MOMENTUM, name='Conv_1_bn')(x) 123 | x = self.layers.ReLU(6., name='out_relu')(x) 124 | 125 | x = self.layers.GlobalAveragePooling2D()(x) 126 | return x 127 | 128 | 129 | class MobileNetV2(_MobileNetV2): 130 | def __init__(self, *args, **kwargs): 131 | # (channels, num_blocks, stride, expansion) 132 | self.first_conv_stride = 2 133 | self.configurations = [(16, 1, 1, 1), 134 | (24, 2, 2, 6), 135 | (32, 3, 2, 6), 136 | (64, 4, 2, 6), 137 | (96, 3, 1, 6), 138 | (160, 3, 2, 6), 139 | (320, 1, 1, 6)] 140 | super().__init__(*args, **kwargs) 141 | 142 | 143 | class MobileNetV2Cifar10(_MobileNetV2): 144 | def __init__(self, *args, **kwargs): 145 | # (channels, num_blocks, stride, expansion) 146 | self.first_conv_stride = 1 147 | self.configurations = [(16, 1, 1, 1), 148 | (24, 2, 1, 6), 149 | (32, 3, 2, 6), 150 | (64, 4, 2, 6), 151 | (96, 3, 1, 6), 152 | (160, 3, 2, 6), 153 | (320, 1, 1, 6)] 154 | super().__init__(*args, **kwargs) 155 | 156 | class MobileNetV2Cifar10_2(_MobileNetV2): 157 | def __init__(self, *args, **kwargs): 158 | # (channels, num_blocks, stride, expansion) 159 | self.first_conv_stride = 1 160 | self.configurations = [(16, 1, 1, 1), 161 | (24, 2, 1, 6), 162 | (32, 3, 1, 6), 163 | (64, 4, 2, 6), 164 | (96, 3, 1, 6), 165 | (160, 3, 2, 6), 166 | (320, 1, 1, 6)] 167 | super().__init__(*args, **kwargs) 168 | 169 | 170 | class MobileNetV2Cifar10Hyper(_MobileNetV2): 171 | def __init__(self, *args, **kwargs): 172 | super().__init__(*args, **kwargs) 173 | 174 | def model(self, x): 175 | # (channels, num_blocks, stride, expansion) 176 | self.first_conv_stride = 1 177 | 178 | # define 10 MobileNetv2 versions 179 | blocks_family = [ 180 | [1, 1, 1, 1, 1, 1, 1], 181 | [1, 1, 1, 2, 1, 1, 1], 182 | [1, 1, 1, 2, 2, 1, 1], 183 | [1, 1, 2, 2, 2, 1, 1], 184 | [1, 1, 2, 2, 2, 2, 1], 185 | [1, 1, 2, 3, 2, 2, 1], 186 | [1, 2, 2, 3, 2, 2, 1], 187 | [1, 2, 3, 3, 2, 2, 1], 188 | [1, 2, 3, 3, 3, 2, 1], 189 | [1, 2, 3, 3, 3, 3, 1], 190 | [1, 2, 3, 4, 3, 3, 1] # Default config 191 | ] 192 | 193 | self.mobilenet_version = self.hp.Int('depth', min_value=0, max_value=10, step=1) 194 | block = blocks_family[self.mobilenet_version] 195 | self.configurations = [(16, block[0], 1, 1), 196 | (24, block[1], 1, 6), 197 | (32, block[2], 1, 6), 198 | (64, block[3], 2, 6), 199 | (96, block[4], 1, 6), 200 | (160, block[5], 2, 6), 201 | (320, block[6], 1, 6)] 202 | super().model(x) 203 | -------------------------------------------------------------------------------- /src/models/alexnet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .generic_model import GenericModelBuilder 3 | 4 | 5 | class AlexNet(GenericModelBuilder): 6 | def model(self, x): 7 | # note regarding batch norm : in the official implementation, there are 2 batchnorms. 8 | # However, it seems they are hurting the training when using with upstride, so for now there are commented. 9 | # Maybe it will change some day, it's why they are commented and not removed 10 | x = self.layers.Conv2D(96//self.factor, (11, 11), 4, padding='same', 11 | kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=42), 12 | bias_initializer=tf.keras.initializers.zeros(), 13 | use_bias=False, 14 | name='conv_1')(x) 15 | #x = tf.keras.layers.BatchNormalization()(x) 16 | x = self.layers.Activation('relu')(x) 17 | x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x) 18 | # Layer 2 - Conv 19 | x = self.layers.Conv2D(256//self.factor, (5, 5), padding='same', 20 | kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=42), 21 | bias_initializer=tf.keras.initializers.ones(), 22 | use_bias=False, 23 | name='conv_2')(x) 24 | #x = tf.keras.layers.BatchNormalization()(x) 25 | x = self.layers.Activation('relu')(x) 26 | x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x) 27 | # Layer 3 - Conv 28 | x = self.layers.Conv2D(384//self.factor, (3, 3), padding='same', 29 | kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=42), 30 | bias_initializer=tf.keras.initializers.zeros(), 31 | use_bias=False, 32 | name='conv_3')(x) 33 | x = self.layers.Activation('relu')(x) 34 | # Layer 4 - Conv 35 | x = self.layers.Conv2D(384//self.factor, (3, 3), padding='same', 36 | kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=42), 37 | bias_initializer=tf.keras.initializers.ones(), 38 | use_bias=False, 39 | name='conv_4')(x) 40 | x = self.layers.Activation('relu')(x) 41 | # Layer 5 - Conv 42 | x = self.layers.Conv2D(256//self.factor, (3, 3), padding='same', 43 | kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=42), 44 | bias_initializer=tf.keras.initializers.ones(), 45 | use_bias=False, 46 | name='conv_5')(x) 47 | x = self.layers.Activation('relu')(x) 48 | x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x) 49 | # Layer 6 - Fully connected 50 | x = self.layers.Flatten()(x) 51 | x = self.layers.Dense(4096//self.factor, 52 | kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=42), 53 | bias_initializer=tf.keras.initializers.ones(), 54 | use_bias=False, 55 | name='dense_1')(x) 56 | x = self.layers.Activation('relu')(x) 57 | x = self.layers.Dropout(0.5, seed=42)(x) 58 | # Layer 7 - Fully connected 59 | x = self.layers.Dense(4096//self.factor, 60 | kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=42), 61 | bias_initializer=tf.keras.initializers.ones(), 62 | use_bias=False, 63 | name='dense_2')(x) 64 | x = self.layers.Activation('relu')(x) 65 | x = self.layers.Dropout(0.5, seed=42)(x) 66 | return x 67 | 68 | 69 | class AlexNetQ(GenericModelBuilder): 70 | def model(self, x): 71 | x = self.layers.Conv2D(96//self.factor, (11, 11), 4, padding='same', 72 | bias_initializer=tf.keras.initializers.zeros(), 73 | use_bias=False, 74 | name='conv_1')(x) 75 | x = self.layers.BatchNormalization(axis=self.channel_axis)(x) 76 | x = self.layers.Activation('relu')(x) 77 | x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x) 78 | # Layer 2 - Conv 79 | x = self.layers.Conv2D(256//self.factor, (5, 5), padding='same', 80 | bias_initializer=tf.keras.initializers.ones(), 81 | use_bias=False, 82 | name='conv_2')(x) 83 | x = self.layers.BatchNormalization(axis=self.channel_axis)(x) 84 | x = self.layers.Activation('relu')(x) 85 | x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x) 86 | # Layer 3 - Conv 87 | x = self.layers.Conv2D(384//self.factor, (3, 3), padding='same', 88 | bias_initializer=tf.keras.initializers.zeros(), 89 | use_bias=False, 90 | name='conv_3')(x) 91 | x = self.layers.BatchNormalization(axis=self.channel_axis)(x) 92 | x = self.layers.Activation('relu')(x) 93 | # Layer 4 - Conv 94 | x = self.layers.Conv2D(384//self.factor, (3, 3), padding='same', 95 | bias_initializer=tf.keras.initializers.ones(), 96 | use_bias=False, 97 | name='conv_4')(x) 98 | x = self.layers.BatchNormalization(axis=self.channel_axis)(x) 99 | x = self.layers.Activation('relu')(x) 100 | # Layer 5 - Conv 101 | x = self.layers.Conv2D(256//self.factor, (3, 3), padding='same', 102 | bias_initializer=tf.keras.initializers.ones(), 103 | use_bias=False, 104 | name='conv_5')(x) 105 | x = self.layers.BatchNormalization(axis=self.channel_axis)(x) 106 | x = self.layers.Activation('relu')(x) 107 | x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x) 108 | # Layer 6 - Fully connected 109 | x = self.layers.Flatten()(x) 110 | x = self.layers.Dense(4096//self.factor, 111 | bias_initializer=tf.keras.initializers.ones(), 112 | use_bias=False, 113 | name='dense_1')(x) 114 | x = self.layers.BatchNormalization(axis=self.channel_axis)(x) 115 | x = self.layers.Activation('relu')(x) 116 | x = self.layers.Dropout(0.5, seed=42)(x) 117 | # Layer 7 - Fully connected 118 | x = self.layers.Dense(4096//self.factor, 119 | bias_initializer=tf.keras.initializers.ones(), 120 | use_bias=False, 121 | name='dense_2')(x) 122 | x = self.layers.Activation('relu')(x) 123 | x = self.layers.Dropout(0.5, seed=42)(x) 124 | return x 125 | 126 | class AlexNetToy(GenericModelBuilder): 127 | def model(self, x): 128 | # This model is a mini version of the AlexNet 129 | x = self.layers.Conv2D(96//self.factor, (11, 11), 4, padding='valid', 130 | bias_initializer=tf.keras.initializers.zeros(), 131 | use_bias=False, 132 | name='conv_1')(x) 133 | x = self.layers.BatchNormalization(axis=self.channel_axis)(x) 134 | x = self.layers.Activation('relu')(x) 135 | x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x) 136 | # Layer 2 - Conv 137 | x = self.layers.Conv2D(128//self.factor, (5, 5), padding='valid', 138 | bias_initializer=tf.keras.initializers.ones(), 139 | use_bias=False, 140 | name='conv_2')(x) 141 | x = self.layers.BatchNormalization(axis=self.channel_axis)(x) 142 | x = self.layers.Activation('relu')(x) 143 | x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x) 144 | # Layer 3 - Conv 145 | x = self.layers.Conv2D(192//self.factor, (3, 3), padding='valid', 146 | bias_initializer=tf.keras.initializers.zeros(), 147 | use_bias=False, 148 | name='conv_3')(x) 149 | x = self.layers.BatchNormalization(axis=self.channel_axis)(x) 150 | x = self.layers.Activation('relu')(x) 151 | # Layer 4 - Conv 152 | x = self.layers.Conv2D(128//self.factor, (3, 3), padding='valid', 153 | bias_initializer=tf.keras.initializers.ones(), 154 | use_bias=False, 155 | name='conv_5')(x) 156 | x = self.layers.BatchNormalization(axis=self.channel_axis)(x) 157 | x = self.layers.Activation('relu')(x) 158 | x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x) 159 | # Layer 5 - Fully connected 160 | x = self.layers.Flatten()(x) 161 | x = self.layers.Dense(2048//self.factor, 162 | bias_initializer=tf.keras.initializers.ones(), 163 | use_bias=False, 164 | name='dense_2')(x) 165 | x = self.layers.Activation('relu')(x) 166 | x = self.layers.Dropout(0.5, seed=42)(x) 167 | return x 168 | -------------------------------------------------------------------------------- /documentation/doc.md: -------------------------------------------------------------------------------- 1 | # Technical documentation 2 | 3 | Before training, you need a dataset. If you're working with a research dataset supported by [TFDS](https://www.tensorflow.org/datasets/catalog/overview#image_classification) then you can skip the first part, else you need to 4 | convert your dataset to TFRecord format. 5 | 6 | ### Converting the dataset to TFRecord format 7 | 8 | go to `script` directory and use the `tfrecord_writer.py` script. To get a full list of parameters for this script run `python tfrecord_writer.py --help`. 9 | 10 | The parameters you need to provide are : 11 | 12 | - `tfrecord_dir_path`: directory where to store tfrecords 13 | - `name`: name of the dataset 14 | - `data.images_dir_path`: directory path for the images 15 | 16 | Default behavior is to save the images in the tfrecord without any processing. 17 | If you want to add some processing you can : 18 | - crop the biggest possible square in the middle of the image and then scale to a fix size 19 | - add a margin to transform the image to a square and the scale it to a fix size 20 | 21 | To do this, set the `processing` parameter to "CENTER_CROP_THEN_SCALE" or "SQUARE_MARGIN_THEN_SCALE" and the `image_size` parameter to desired fix size. 22 | 23 | Last thing to define is how to split the dataset. You may want to only create a training or validation set, but you also 24 | may need to split the images into different sets in a well balanced fashion. This can be achived using the `data.split_name` and 25 | `data.split_percentages` parameters. By default `data.split_name` is a list of 3 elements : `['train', 'validation', 'test']` and 26 | `data.split_percentages` a list of 3 floats: `[0.8, 0.1, 0.1]`, so 80% of the images will go into the train set, and 10% in validation and test. You can chose as many split you want. 27 | 28 | For example, this command prepare a dataset by center cropping then scaling to 256x256, with 70% in training set and 30% in validation : 29 | 30 | ```bash 31 | python3 tfrecord_writer.py \ 32 | --tfrecord_dir_path /path/to/tfrecord 33 | --name example_dataset 34 | --preprocessing CENTER_CROP_THEN_SCALE 35 | --image_size 256 256 36 | --data.images_dir_path /path/to/image 37 | --data.split_names train validation 38 | --data.split_percentages 0.7 0.3 39 | ``` 40 | 41 | ### Start the training 42 | 43 | At this step, you should have a tfrecord ready for training. Now, let's go in details in the configuration of the training. 44 | 45 | #### Setup the Engine 46 | 47 | This code support training with Tensorflow 2.3 and UpStride 1.0. To select the framework use the `framework` parameter. Possible values are : 48 | 49 | - tensorflow: well, you probably know this one 50 | - upstride_real: should be used when working with the same mathematics than Tensorflow 51 | - upstride_type1: should be used to deal with 2D data like points, lines, polygons or grayscale images. 52 | - upstride_type2: should be used to deal with simple 3D data like 3D point cloud, 3D lines, polyhedrons or colored images 53 | - upstride_type3: should be used to deal with more complex 3D data like 3D point cloud, 3D lines, polyhedrons or colored images 54 | 55 | Please note that when using upstride engine, we highly recommend to reduce the number of channels in the neural network. This can be done using the `factor` parameter. 56 | Factor is the division factor to scale the number of channel. factor=2 means the model will have half the number of channels compare to default implementation 57 | 58 | When using upstride_type2, we recommend to use factor=2 or factor=4. 59 | 60 | Of course, if you're working with TensorFlow but with a small dataset, you may also want to change the factor 61 | 62 | #### Setup the neural network 63 | 64 | currently we support : 65 | 66 | - `AlexNet` 67 | - EfficientNet family, from `EfficientNetB0` to `EfficientNetB7` 68 | - Resnet family: `ResNet18`, `ResNet34`, `ResNet50`, `ResNet101`, `ResNet152`, `ResNet20CIFAR`, `ResNet32CIFAR`, `ResNet44CIFAR`, `ResNet56CIFAR`, `ResNet18NCHW`, `ResNet34NCHW`, `ResNet50NCHW`, `ResNet101NCHW`, `ResNet152NCHW` 69 | - MobileNet family: `MobileNetV2`, `MobileNetV2NCHW` 70 | - NasNet family:`NASNetCIFAR`, `NASNetLarge`, `NASNetMobile`, 71 | - `SqueezeNet` 72 | - `TinyDarknet` 73 | - `VGG16` 74 | 75 | (of course this list will increase as time goes on) 76 | 77 | you can select the model you want to train using the `model_name` parameter. Then you may want to tune the size of the input of the network (parameter `input_size`, by default 224x224x3) 78 | and the size of the output of the neural network (parameter `num_classes`) 79 | 80 | 81 | #### Setup the dataloader and preprocessing 82 | 83 | First we need to give the training script the path to the tfrecord files. If you're working with a research dataset, the name of the dataset is enough, the code will download it automatically. For instance for working with 84 | cifar10, `dataloader.name` can be set to `cifar10`. 85 | For custom datasets, you need to provide the name and the path of the tfrecord file (parameter `dataloader.data_dir`) 86 | 87 | Now we need to setup the data augmentation. The operations we support are : 88 | 89 | - CentralCrop 90 | - ColorJitter 91 | - Normalize 92 | - RandomHorizontalFlip 93 | - RandomRotate 94 | - RandomRotate90 95 | - RandomVerticalFlip 96 | - RandomCrop 97 | - RandomCropThenResize 98 | - Resize 99 | - ResizeThenRandomCrop 100 | - Translate 101 | 102 | Using the parameters `dataloader.train_list` and `dataloader.val_list` you can list the data augmentation operations to run in the data pipeline. Operation will be executed in the order of the list. 103 | Each of these operations have special parameters. Please check the output of `python3 train.py --help` for more details. 104 | 105 | #### Setup the optimizer 106 | 107 | Last this to setup in the training pipeline is of course the optimizer. Currently we support : 108 | 109 | - adadelta 110 | - adagrad 111 | - adam 112 | - adam_amsgrad 113 | - sgd 114 | - sgd_momentum 115 | - sgd_nesterov 116 | - nadam 117 | - rmsprop 118 | 119 | you can select the one you want using parameter `optimizer.name`. You may also want to tune the momentum of the optimizer using `optimizer.momentum` parameter 120 | 121 | the initial learning rate can be selected using `optimizer.lr`. We also support a list of learning rate decay strategies: 122 | 123 | - exponential_decay 124 | - step_decay 125 | - step_decay_schedule 126 | - polynomial_decay 127 | - inverse_time_decay 128 | - cosine_decay 129 | - lr_reduce_on_plateau 130 | 131 | you can chose to turn on the learning rate decay strategy using option `optimizer.lr_decay_strategy.activate` and then select the one you prefer using `optimizer.lr_decay_strategy.lr_params.strategy` (default is lr_reduce_on_plateau). 132 | There is a list of parameters to setup for everyone of these learning rate decay, please see `python3 train.py --help` or file `submodules/global_dl/training/optimizers.py` for more details. 133 | 134 | #### Other parameters 135 | 136 | Now, most of the work is done. The only remaining options are: 137 | - `num_epochs` The number of epochs to run 138 | - `checkpoint_dir`, `export_dir`, `log_dir`: paths to write checkpoints, exported model and training logs 139 | - `configuration.with_mixed_precision`, `configuration.mirrored` , `configuration.profiler`, configuration for mixed precision training, mirrored strategy and tensorboard profiler 140 | 141 | 142 | #### training example 143 | 144 | to train a mobilenet for 20 epochs on cifar10, a training command can looks like this: 145 | 146 | ```bash 147 | python train.py \ 148 | --model.name MobileNetV2 \ 149 | --model.num_classes 10 \ 150 | --model.input_size 32 32 3 \ 151 | --num_epochs 20 \ 152 | --checkpoint_dir /tmp/checkpoint \ 153 | --log_dir /tmp/log \ 154 | --export.dir /tmp \ 155 | --dataloader.name cifar10 \ 156 | --dataloader.train_list RandomHorizontalFlip Normalize \ 157 | --dataloader.val_list Normalize \ 158 | --dataloader.val_split_id validation \ 159 | --dataloader.train_split_id train \ 160 | --dataloader.batch_size 64 \ 161 | --optimizer.lr 0.0001 162 | ``` 163 | 164 | ## More regarding the parameters 165 | 166 | When training a neural network, you can use a bash command, but it is also possible to use a yaml file. For instance, the previous example can also be run with the command : 167 | 168 | ```bash 169 | python train.py --yaml_config conf.yml 170 | ``` 171 | 172 | and the file `conf.yml`: 173 | 174 | ```yaml 175 | model: 176 | name: MobileNetV2 177 | num_classes: 10 178 | input_size: [32, 32, 3] 179 | num_epochs: 20 180 | checkpoint_dir: /tmp/checkpoint 181 | log_dir: /tmp/log 182 | export: 183 | dir: /tmp 184 | dataloader: 185 | name: cifar10 186 | train_list: [RandomHorizontalFlip, Normalize] 187 | val_list: [Normalize] 188 | val_split_id: validation 189 | train_split_id: train 190 | batch_size: 64 191 | optimizer: 192 | lr: 0.0001 193 | ``` 194 | 195 | you can also mix both training file and command line. If a parameter is defined in both, then the command line will prevail. 196 | For more information on the argument parser, please visit https://github.com/UpStride/betterargparse 197 | 198 | 199 | ## Keras-Tuner 200 | 201 | Is is also possible to perform hyper-parameter tuning using Keras-Tuner and Hyperband algorithm. The 2 default exploration axis are 202 | 203 | - depths : number of blocks in the network 204 | - factor : divide ratio for the number of channels 205 | 206 | Please note that Keras-Tuner only work with Mobilenet and Resnet for now. 207 | 208 | The final results will look like this (figures are validation accuracy percentages): 209 | 210 | ![Keras-Tuner](../ressources/keras_tuner.png) 211 | -------------------------------------------------------------------------------- /src/models/test_fbnetv2.py: -------------------------------------------------------------------------------- 1 | import json 2 | import unittest 3 | import os 4 | import shutil 5 | import tempfile 6 | import yaml 7 | import tensorflow as tf 8 | import numpy as np 9 | from . import fbnetv2 10 | 11 | 12 | class TestBinaryVector(unittest.TestCase): 13 | def test_create_binary_vector(self): 14 | binary_vectors = fbnetv2.create_binary_vector(channel_sizes=[2, 7, 10], dtype=tf.float32) 15 | # check that we have 3 vectors 16 | self.assertEqual(len(binary_vectors), 3) 17 | # check the types of vectors 18 | for i in range(3): 19 | self.assertEqual(binary_vectors[i].dtype, tf.float32) 20 | # check the vector content 21 | self.assertTrue(np.array_equal(binary_vectors[0].numpy(), [1., 1., 0., 0., 0., 0., 0., 0., 0., 0.])) 22 | self.assertTrue(np.array_equal(binary_vectors[1].numpy(), [1., 1., 1., 1., 1., 1., 1., 0., 0., 0.])) 23 | self.assertTrue(np.array_equal(binary_vectors[2].numpy(), [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])) 24 | 25 | 26 | class TestGetMask(unittest.TestCase): 27 | def test_get_mask(self): 28 | binary_vectors = fbnetv2.create_binary_vector(channel_sizes=[1, 2, 4], dtype=tf.float32) 29 | g = tf.convert_to_tensor([2., 3., 5.]) 30 | mask = fbnetv2.get_mask(binary_vectors, g) 31 | self.assertEqual(mask.dtype, tf.float32) 32 | self.assertTrue(np.array_equal(mask, [10., 8., 5., 5.])) 33 | 34 | 35 | class TestChannelMasking(unittest.TestCase): 36 | def test_init(self): 37 | cm = fbnetv2.ChannelMasking(1, 5, 2, 'toto') 38 | self.assertEqual(cm.channel_sizes, [1, 3, 5]) 39 | 40 | def test_build_manual(self): 41 | cm = fbnetv2.ChannelMasking(1, 5, 2, 'toto') 42 | cm.build((15, 15, 3)) 43 | self.assertTrue(np.array_equal(cm.alpha.numpy(), [1., 1., 1.])) 44 | self.assertTrue(np.array_equal(cm.binary_vectors[0].numpy(), [1., 0., 0., 0., 0.])) 45 | self.assertTrue(np.array_equal(cm.binary_vectors[1].numpy(), [1., 1., 1., 0., 0.])) 46 | self.assertTrue(np.array_equal(cm.binary_vectors[2].numpy(), [1., 1., 1., 1., 1.])) 47 | 48 | def test_build_keras(self): 49 | cm = fbnetv2.ChannelMasking(1, 5, 2, 'toto', gumble_noise=False) 50 | model = tf.keras.Sequential([tf.keras.layers.Conv2D(5, (3, 3), padding='same', use_bias=False), cm]) 51 | model(tf.zeros((1, 24, 24, 3), dtype=tf.float32)) # build is called here 52 | self.assertTrue(np.array_equal(cm.alpha.numpy(), [1., 1., 1.])) 53 | self.assertTrue(np.array_equal(cm.binary_vectors[0].numpy(), [1., 0., 0., 0., 0.])) 54 | self.assertTrue(np.array_equal(cm.binary_vectors[1].numpy(), [1., 1., 1., 0., 0.])) 55 | self.assertTrue(np.array_equal(cm.binary_vectors[2].numpy(), [1., 1., 1., 1., 1.])) 56 | 57 | def test_call(self): 58 | cm = fbnetv2.ChannelMasking(1, 5, 2, 'toto', gumble_noise=False) 59 | model = tf.keras.Sequential([cm]) 60 | out = model(tf.ones((1, 3, 3, 5), dtype=tf.float32)) # build is called here 61 | # check g parameter 62 | for e in cm.g.numpy(): 63 | self.assertAlmostEqual(e, 1/3) 64 | # check output of the model 65 | self.assertEqual(out.shape, (1, 3, 3, 5)) 66 | components = out[0, 0, 0] 67 | self.assertAlmostEqual(components.numpy()[0], 1) 68 | self.assertAlmostEqual(components.numpy()[1], 2/3) 69 | self.assertAlmostEqual(components.numpy()[2], 2/3) 70 | self.assertAlmostEqual(components.numpy()[3], 1/3) 71 | self.assertAlmostEqual(components.numpy()[4], 1/3) 72 | 73 | 74 | class TestExponentialDecay(unittest.TestCase): 75 | def non_increasing(self, decay): 76 | """Checks if all the function provided are non increasing over a range 77 | 78 | Args: 79 | decay (instance): Instance of the decay to be tested 80 | 81 | Returns: 82 | bool : Compares ith and i + 1st element of the value_list 83 | returns True if all i >= i+1 else False 84 | """ 85 | value_list = [decay(i) for i in range(1, 100)] 86 | return all([i >= j for i, j in zip(value_list, value_list[1:])]) 87 | 88 | def test_exponential_decay(self): 89 | decay = fbnetv2.exponential_decay(5, 1, 0.956) 90 | 91 | self.assertEqual(decay(0), 5) 92 | self.assertAlmostEqual(decay(10), 3.188, places=3) 93 | 94 | # test function is not increasing over number of epochs 95 | self.assertTrue(self.non_increasing(decay), True) 96 | 97 | # Negative test to ensure decay rate greater than 1 is increasing 98 | decay = fbnetv2.exponential_decay(5, 1, 1.1) 99 | self.assertFalse(self.non_increasing(decay), False) 100 | 101 | 102 | class TestPostTrainingAnalysis(unittest.TestCase): 103 | def test_post_training_anaysis(self): 104 | cm1 = fbnetv2.ChannelMasking(1, 5, 2, 'toto_1_savable', gumble_noise=False) 105 | cm2 = fbnetv2.ChannelMasking(8, 16, 4, 'toto_2_savable', gumble_noise=False) 106 | model = tf.keras.Sequential( 107 | [tf.keras.layers.Conv2D(5, (3, 3), padding='same', use_bias=False), 108 | cm1, 109 | tf.keras.layers.Conv2D(16, (3, 3), padding='same', use_bias=False), 110 | cm2, 111 | ]) 112 | model(tf.zeros((1, 24, 24, 3), dtype=tf.float32)) # build is called here 113 | tmpdir = tempfile.mkdtemp() 114 | tmpfile = os.path.join(tmpdir, "test.yaml") 115 | fbnetv2.post_training_analysis(model, tmpfile) 116 | with open(tmpfile, 'r') as f: 117 | read = yaml.safe_load(f) 118 | self.assertDictEqual({"toto_1": 1, "toto_2": 8}, read) 119 | shutil.rmtree(tmpdir) 120 | 121 | 122 | class TestSplitTtrainableWeights(unittest.TestCase): 123 | def test_split_trainable_weights(self): 124 | layer0 = tf.keras.layers.Input((32, 32, 3)) 125 | layer1 = tf.keras.layers.Conv2D(8, kernel_size=3, strides=1, padding='same') 126 | layer2 = fbnetv2.ChannelMasking(2, 8, 2, 'abc', gumble_noise=False) 127 | model = tf.keras.Sequential([layer0, layer1, layer2]) 128 | 129 | weights, arch_params = fbnetv2.split_trainable_weights(model) 130 | 131 | true_total_weight_param = 3*3*3*8+8 132 | true_total_arch_param = len(range(2, 8+1, 2)) 133 | 134 | # calculate number of weight params returned by the function 135 | total_weight_params = 0 136 | for w in weights: 137 | total_weight_params += np.prod(w.shape.as_list()) 138 | 139 | # calculate number of architecture params returned by the function 140 | total_arch_params = 0 141 | for p in arch_params: 142 | total_arch_params += np.prod(p.shape.as_list()) 143 | 144 | self.assertEqual(total_arch_params, true_total_arch_param) 145 | self.assertEqual(total_weight_params, true_total_weight_param) 146 | self.assertEqual(total_weight_params+total_arch_params, true_total_weight_param+true_total_arch_param) 147 | 148 | def test_not_arch_params(self): 149 | layer0 = tf.keras.layers.Input((32, 32, 3)) 150 | layer1 = tf.keras.layers.Conv2D(8, kernel_size=3, strides=1, padding='same') 151 | model = tf.keras.Sequential([layer0, layer1]) 152 | 153 | # check if it raises error when there is no architectural parameters by the name 'alpha' 154 | self.assertRaises(ValueError, fbnetv2.split_trainable_weights, model, arch_params_name='alpha') 155 | 156 | 157 | class TestGumbelSoftmax(unittest.TestCase): 158 | def testSampling(self): 159 | fbnetv2.define_temperature(5.) 160 | noise = 0.0001 161 | logits = tf.constant([-1., 0.5, 1.]) 162 | 163 | g = fbnetv2.gumbel_softmax(logits, gumble_noise=False) 164 | 165 | self.assertEqual(logits.shape.as_list(), g.shape.as_list()) 166 | self.assertAlmostEqual(g.numpy().sum(), 1.0, 6) 167 | self.assertEqual(g.numpy().tolist(), tf.math.softmax((logits+noise)/5.).numpy().tolist()) 168 | 169 | def testUniformLikeDist(self): 170 | # set temperature values to high to see Uniform like distribution 171 | fbnetv2.define_temperature(500000.0) 172 | logits = tf.constant([-2., 2., -2.5, -2.]) 173 | 174 | g = fbnetv2.gumbel_softmax(logits, gumble_noise=False) 175 | 176 | for i in range(4): 177 | self.assertAlmostEqual(float(g[i].numpy()), 0.25, 5) 178 | 179 | previous_g = g 180 | for t in range(100, 1, -1): 181 | fbnetv2.define_temperature(t) 182 | g = fbnetv2.gumbel_softmax(logits, gumble_noise=False) 183 | for i in [0, 2, 3]: 184 | self.assertLess(g[i], previous_g[i]) 185 | self.assertLess(previous_g[1], g[1]) 186 | previous_g = g 187 | fbnetv2.define_temperature(5.0) 188 | 189 | def testOnehotLikeDist(self): 190 | # set temperature values to high to see Onehot like distribution 191 | fbnetv2.define_temperature(0.00001) 192 | logits = tf.constant([-2., 2., -2.5, -2.]) 193 | 194 | g = fbnetv2.gumbel_softmax(logits, gumble_noise=False) 195 | 196 | self.assertAlmostEqual(g[0], 0.) 197 | self.assertAlmostEqual(g[1], 1.) 198 | self.assertAlmostEqual(g[2], 0.) 199 | self.assertAlmostEqual(g[3], 0.) 200 | fbnetv2.define_temperature(5.0) 201 | 202 | 203 | class TestSaveArchParams(unittest.TestCase): 204 | def test_save_arch_params(self): 205 | cm1 = fbnetv2.ChannelMasking(1, 5, 2, 'toto_1_savable', gumble_noise=False) 206 | cm2 = fbnetv2.ChannelMasking(8, 16, 4, 'toto_2_savable', gumble_noise=False) 207 | model = tf.keras.Sequential( 208 | [tf.keras.layers.Conv2D(5, (3, 3), padding='same', use_bias=False), 209 | cm1, 210 | tf.keras.layers.Conv2D(16, (3, 3), padding='same', use_bias=False), 211 | cm2, 212 | ]) 213 | model(tf.zeros((1, 24, 24, 3), dtype=tf.float32)) # build is called here 214 | 215 | tmpdir = tempfile.mkdtemp() 216 | fbnetv2.save_arch_params(model, epoch=0, log_dir=tmpdir) 217 | 218 | # check that the file exists and the content 219 | self.assertTrue(os.path.exists(os.path.join(tmpdir, "alpha.json"))) 220 | with open(os.path.join(tmpdir, "alpha.json")) as f: 221 | a = json.load(f) 222 | self.assertTrue('0' in a) 223 | self.assertTrue('toto_1_savable' in a['0']) 224 | self.assertTrue('toto_2_savable' in a['0']) 225 | 226 | # simulate next epoch 227 | cm1.alpha = tf.convert_to_tensor([0.5, 1, 0.5], dtype=tf.float32) 228 | fbnetv2.save_arch_params(model, epoch=1, log_dir=tmpdir) 229 | with open(os.path.join(tmpdir, "alpha.json")) as f: 230 | a = json.load(f) 231 | for i in range(1): 232 | self.assertTrue(str(i) in a) 233 | self.assertTrue('toto_1_savable' in a[str(i)]) 234 | self.assertTrue('toto_2_savable' in a[str(i)]) 235 | 236 | shutil.rmtree(tmpdir) 237 | -------------------------------------------------------------------------------- /src/models/resnet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .generic_model import GenericModelBuilder 3 | 4 | 5 | weight_init = tf.keras.initializers.VarianceScaling() 6 | 7 | 8 | class ResNet(GenericModelBuilder): 9 | def __init__(self, res_n, *args, **kwargs): 10 | super(ResNet, self).__init__(*args, **kwargs) 11 | self.res_n = res_n 12 | 13 | def get_residual_layer(self): 14 | n_to_residual = { 15 | 10: [1, 1, 1, 1], 16 | 12: [1, 1, 2, 1], 17 | 14: [1, 2, 2, 1], 18 | 16: [2, 2, 2, 1], 19 | 18: [2, 2, 2, 2], 20 | 20: [2, 2, 3, 2], 21 | 22: [2, 3, 3, 2], 22 | 24: [2, 3, 4, 2], 23 | 26: [2, 3, 5, 2], 24 | 28: [2, 3, 6, 2], 25 | 30: [2, 4, 6, 2], 26 | 32: [3, 4, 6, 2], 27 | 34: [3, 4, 6, 3], 28 | 50: [3, 4, 6, 3], 29 | 101: [3, 4, 23, 3], 30 | 152: [3, 8, 36, 3], 31 | } 32 | return n_to_residual[self.res_n] 33 | 34 | def model(self, x): 35 | if self.res_n < 50: 36 | residual_block = self.resblock 37 | else: 38 | residual_block = self.bottle_resblock 39 | residual_list = self.get_residual_layer() 40 | ch = 64 41 | weight_regularizer = self.weight_regularizer 42 | x = self.layers.Conv2D(int(ch/self.factor), 7, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, padding="same", name='conv')(x) 43 | x = self.layers.MaxPooling2D(pool_size=3, strides=2)(x) 44 | for i in range(residual_list[0]): 45 | x = residual_block(x, channels=int(ch/self.factor), downsample=False, block_name='resblock0_' + str(i)) 46 | # block 1 47 | x = residual_block(x, channels=int(ch/self.factor) * 2, downsample=True, block_name='resblock1_0') 48 | for i in range(1, residual_list[1]): 49 | x = residual_block(x, channels=int(ch/self.factor) * 2, downsample=False, block_name='resblock1_' + str(i)) 50 | # block 2 51 | x = residual_block(x, channels=int(ch/self.factor) * 4, downsample=True, block_name='resblock2_0') 52 | for i in range(1, residual_list[2]): 53 | x = residual_block(x, channels=int(ch/self.factor) * 4, downsample=False, block_name='resblock2_' + str(i)) 54 | # block 3 55 | x = residual_block(x, channels=int(ch/self.factor) * 8, downsample=True, block_name='resblock_3_0') 56 | for i in range(1, residual_list[3]): 57 | x = residual_block(x, channels=int(ch/self.factor) * 8, downsample=False, block_name='resblock_3_' + str(i)) 58 | # block 4 59 | x = self.layers.BatchNormalization(axis=self.channel_axis, name='batch_norm_last')(x) 60 | x = self.layers.Activation('relu', name='relu_last')(x) 61 | x = self.layers.GlobalAveragePooling2D()(x) 62 | return x 63 | 64 | def resblock(self, x, channels, use_bias=True, downsample=False, block_name='resblock'): 65 | layers = self.layers 66 | weight_regularizer = self.weight_regularizer 67 | x_init = x 68 | x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_0')(x) 69 | x = layers.Activation('relu', name=block_name + '/relu_0')(x) 70 | if downsample: 71 | x = layers.Conv2D(channels, 3, 2, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, 72 | use_bias=use_bias, padding='same', name=block_name + '/conv_0')(x) 73 | x_init = layers.Conv2D(channels, 1, 2, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, 74 | use_bias=False, padding='same', name=block_name + '/conv_init')(x_init) 75 | else: 76 | x = layers.Conv2D(channels, 3, 1, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, 77 | use_bias=False, padding='same', name=block_name + '/conv_0')(x) 78 | x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_1')(x) 79 | x = layers.Activation('relu', name=block_name + '/relu_1')(x) 80 | x = layers.Conv2D(channels, 3, 1, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, 81 | use_bias=False, padding='same', name=block_name + '/conv_1')(x) 82 | x = layers.Add()([x, x_init]) 83 | return x 84 | 85 | def bottle_resblock(self, x, channels, use_bias=True, downsample=False, block_name='bottle_resblock'): 86 | layers = self.layers 87 | weight_regularizer = self.weight_regularizer 88 | x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_1x1_front')(x) 89 | shortcut = layers.Activation('relu', name=block_name + '/relu_1x1_front')(x) 90 | x = layers.Conv2D(channels, 1, 1, 'same', kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, 91 | use_bias=False, name=block_name + '/conv_1x1_front')(shortcut) 92 | x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_3x3')(x) 93 | x = layers.Activation('relu', name=block_name + '/relu_3x3')(x) 94 | if downsample: 95 | x = layers.Conv2D(channels, 3, 2, 'same', kernel_initializer=weight_init, 96 | kernel_regularizer=weight_regularizer, use_bias=use_bias, name=block_name + '/conv_0')(x) 97 | shortcut = layers.Conv2D(channels * 4, 1, 2, 'same', kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, 98 | use_bias=False, name=block_name + '/conv_init')(shortcut) 99 | else: 100 | x = layers.Conv2D(channels, 3, 1, 'same', kernel_initializer=weight_init, 101 | kernel_regularizer=weight_regularizer, use_bias=False, name=block_name + '/conv_0')(x) 102 | shortcut = layers.Conv2D(channels * 4, 1, 1, 'same', kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, 103 | use_bias=False, name=block_name + '/conv_init')(shortcut) 104 | x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_1x1_back')(x) 105 | x = layers.Activation('relu', name=block_name + '/relu_1x1_back')(x) 106 | x = layers.Conv2D(channels * 4, 1, 1, 'same', kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, 107 | use_bias=False, name=block_name + '/conv_1x1_back')(x) 108 | x = layers.Add()([x, shortcut]) 109 | return x 110 | 111 | 112 | class ResNetHyper(ResNet): 113 | def __init__(self, *args, **kwargs): 114 | super().__init__(2 * self.hp.Int('depth', min_value=5, max_value=17, step=1), *args, **kwargs) 115 | 116 | 117 | class ResNet50(ResNet): 118 | def __init__(self, *args, **kwargs): 119 | super().__init__(50, *args, **kwargs) 120 | 121 | 122 | class ResNet101(ResNet): 123 | def __init__(self, *args, **kwargs): 124 | super().__init__(101, *args, **kwargs) 125 | 126 | 127 | class ResNet152(ResNet): 128 | def __init__(self, *args, **kwargs): 129 | super().__init__(152, *args, **kwargs) 130 | 131 | 132 | class ResNet34(ResNet): 133 | def __init__(self, *args, **kwargs): 134 | super().__init__(34, *args, **kwargs) 135 | 136 | 137 | class ResNet18(ResNet): 138 | def __init__(self, *args, **kwargs): 139 | super().__init__(18, *args, **kwargs) 140 | 141 | 142 | class ResNetCIFAR(GenericModelBuilder): 143 | def __init__(self, res_n, *args, **kwargs): 144 | super(ResNetCIFAR, self).__init__(*args, **kwargs) 145 | self.res_n = res_n 146 | 147 | def get_residual_layer(self): 148 | n_to_residual = { 149 | 20: [3], 150 | 32: [5], 151 | 44: [7], 152 | 56: [9], 153 | } 154 | return n_to_residual[self.res_n] * 3 155 | 156 | def model(self, x): 157 | residual_list = self.get_residual_layer() 158 | weight_regularizer = self.weight_regularizer 159 | ch = 16 160 | x = self.layers.Conv2D(int(ch/self.factor), 3, 1, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, 161 | padding="same", name='conv')(x) 162 | 163 | # block 1 164 | for i in range(residual_list[0]): 165 | x = self.resblock_cifar(x, channels=int(ch/self.factor), stride=1, downsample=False, block_name='resblock0_' + str(i)) 166 | # block 2 167 | x = self.resblock_cifar(x, channels=int(ch/self.factor) * 2, stride=2, downsample=True, block_name='resblock1_0') 168 | for i in range(1, residual_list[1]): 169 | x = self.resblock_cifar(x, channels=int(ch/self.factor) * 2, stride=1, downsample=False, block_name='resblock1_' + str(i)) 170 | # block 3 171 | x = self.resblock_cifar(x, channels=int(ch/self.factor) * 4, stride=2, downsample=True, block_name='resblock2_0') 172 | for i in range(1, residual_list[2]): 173 | x = self.resblock_cifar(x, channels=int(ch/self.factor) * 4, stride=1, downsample=False, block_name='resblock2_' + str(i)) 174 | # block 4 175 | x = self.layers.BatchNormalization(axis=self.channel_axis, name='batch_norm_last')(x) 176 | x = self.layers.Activation('relu', name='relu_last')(x) 177 | x = self.layers.GlobalAveragePooling2D()(x) 178 | return x 179 | 180 | def resblock_cifar(self, x, channels, use_bias=True, stride=1, downsample=False, block_name='resblock'): 181 | layers = self.layers 182 | weight_regularizer = self.weight_regularizer 183 | x_init = x 184 | x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_0')(x) 185 | x = layers.Activation('relu', name=block_name + '/relu_0')(x) 186 | if downsample: 187 | x_init = layers.Conv2D(channels, 3, 2, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, 188 | use_bias=use_bias, padding='same', name=block_name + '/conv_init')(x_init) 189 | x = layers.Conv2D(channels, 3, strides=stride, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, 190 | use_bias=False, padding='same', name=block_name + '/conv_0')(x) 191 | x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_1')(x) 192 | x = layers.Activation('relu', name=block_name + '/relu_1')(x) 193 | x = layers.Conv2D(channels, 3, 1, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, 194 | use_bias=False, padding='same', name=block_name + '/conv_1')(x) 195 | x = layers.Add()([x, x_init]) 196 | return x 197 | 198 | 199 | class ResNet20CIFAR(ResNetCIFAR): 200 | def __init__(self, *args, **kwargs): 201 | super().__init__(20, *args, **kwargs) 202 | 203 | 204 | class ResNet32CIFAR(ResNetCIFAR): 205 | def __init__(self, *args, **kwargs): 206 | super().__init__(32, *args, **kwargs) 207 | 208 | 209 | class ResNet44CIFAR(ResNetCIFAR): 210 | def __init__(self, *args, **kwargs): 211 | super().__init__(44, *args, **kwargs) 212 | 213 | 214 | class ResNet56CIFAR(ResNetCIFAR): 215 | def __init__(self, *args, **kwargs): 216 | super().__init__(56, *args, **kwargs) 217 | -------------------------------------------------------------------------------- /src/models/fbnet_mobilenet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | import yaml 4 | 5 | from .generic_model import GenericModelBuilder 6 | from .fbnetv2 import ChannelMasking 7 | from .mobilenet import correct_pad 8 | 9 | BATCHNORM_MOMENTUM = 0.9 10 | arch_param_regularizer = tf.keras.regularizers.l2(l=0.0005) 11 | 12 | class _FBNet_MobileNetV2(GenericModelBuilder): 13 | def __init__(self, *args, load_searched_arch: str = None, **kwargs): 14 | """the official implementation use tf.keras.backend.int_shape(x)[-1] to compute in_channels. 15 | But with upstride we can't, because if working with the cpp engine, the shape of the tensor is multiply by the length of the multivector. 16 | To bypass this issue, we can remember the output shape of the last block and use it to define in_channels 17 | At the begining of the model building, the shape shoud be 3 (number of channels) 18 | """ 19 | self.last_block_output_shape = 3 20 | self.load_searched_arch = load_searched_arch 21 | 22 | if self.load_searched_arch: 23 | if tf.io.gfile.exists(self.load_searched_arch): 24 | with open(self.load_searched_arch, 'r') as f: 25 | self.model_def = yaml.safe_load(f) 26 | else: 27 | raise FileNotFoundError(f"{self.load_searched_arch} incorrect, check the path") 28 | assert all([k1 == k2 for k1, k2 in zip(self.model_def.keys(), self.mapping.keys())]), "keys are not the same" 29 | for (k1, v1), (k2, v2) in zip(self.model_def.items(), self.mapping.items()): 30 | if k1 == k2: 31 | self.mapping[k2][0] = v1 32 | 33 | # init of super class need to be called at the end of this init because it calls model(), so everything need to be ready before 34 | super().__init__(*args, **kwargs) 35 | 36 | def _inverted_res_block(self, x, filters, stride, expansion, name): 37 | """This block performs the Conv(expansion)-> DepthWiseConv -> Conv(Projection)) 38 | 39 | Args: 40 | expansion (integer): Interger value to increase the channels from the previous layer 41 | stride (Int): Strides to be applied in the convolution 42 | filters (Int) or tuple(Int): total feature maps to be obtained at the end of the block or range (min, max, step) during arch search 43 | name (str): Indicates the block number and controls expansion or just depthwise separable convolution 44 | """ 45 | layers = self.layers 46 | weight_regularizer = self.weight_regularizer 47 | in_channels = self.last_block_output_shape 48 | # If model definition file is not passed 49 | if not self.load_searched_arch: 50 | # get the max possible number of channels 51 | pointwise_conv_filters = filters[1] 52 | else: 53 | # get the number of channels defined in the file 54 | pointwise_conv_filters = filters 55 | 56 | # TODO Enable this to see if we get speed up (multiples of 8 is required for activating tensor cores for mixed precision training) 57 | # pointwise_filters = _make_divisible(pointwise_conv_filters, 8) 58 | inputs = x 59 | prefix = name 60 | 61 | 62 | # Expand 63 | x = layers.Conv2D((expansion * in_channels), kernel_size=1, padding='same', use_bias=False, name=prefix + 'expand', kernel_regularizer=weight_regularizer)(x) 64 | # if not self.load_searched_arch: 65 | # new_filter_range = [i * expansion for i in filters] 66 | # x = ChannelMasking(*new_filter_range, name=prefix + '_cm1', regularizer=arch_param_regularizer)(x) 67 | x = layers.BatchNormalization(epsilon=1e-3, momentum=BATCHNORM_MOMENTUM, name=prefix + 'expand_BN')(x) 68 | x = layers.ReLU(6., name=prefix + 'expand_relu')(x) 69 | 70 | # Depthwise 71 | if stride == 2: 72 | x = layers.ZeroPadding2D(padding=correct_pad(x, 3, self.is_channels_first), name=prefix + 'pad')(x) 73 | x = layers.DepthwiseConv2D(kernel_size=3, strides=stride, activation=None, use_bias=False, padding='same' if stride == 1 else 'valid', 74 | name=prefix + 'depthwise', depthwise_regularizer=weight_regularizer)(x) 75 | x = layers.BatchNormalization(epsilon=1e-3, momentum=BATCHNORM_MOMENTUM, name=prefix + 'depthwise_BN')(x) 76 | x = layers.ReLU(6., name=prefix + 'depthwise_relu')(x) 77 | 78 | # Project 79 | x = layers.Conv2D(pointwise_conv_filters, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'project', kernel_regularizer=weight_regularizer)(x) 80 | if not self.load_searched_arch: 81 | x = ChannelMasking(*filters, name=prefix + '_savable', regularizer=arch_param_regularizer)(x) 82 | x = layers.BatchNormalization(epsilon=1e-3, momentum=BATCHNORM_MOMENTUM, name=prefix + 'project_BN')(x) 83 | 84 | if in_channels == pointwise_conv_filters and stride == 1: 85 | x = layers.Add(name=prefix + 'add')([inputs, x]) 86 | self.last_block_output_shape = pointwise_conv_filters 87 | 88 | return x 89 | 90 | def model(self, x, alpha=1.0): 91 | """Instantiates the MobileNetV2 architecture. 92 | Args: 93 | alpha: controls the width of the network. This is known as the 94 | width multiplier in the MobileNetV2 paper, but the name is kept for 95 | consistency with MobileNetV1 in Keras. 96 | - If `alpha` < 1.0, proportionally decreases the number 97 | of filters in each layer. 98 | - If `alpha` > 1.0, proportionally increases the number 99 | of filters in each layer. 100 | - If `alpha` = 1, default number of filters from the paper 101 | are used at each layer. 102 | """ 103 | weight_regularizer = self.weight_regularizer 104 | 105 | # first_block_filters = _make_divisible(16, 8) 106 | x = self.layers.ZeroPadding2D(padding=correct_pad(x, 3, self.is_channels_first), name='conv1_pad')(x) 107 | 108 | first_block_filters = self.mapping['conv2d_01'] 109 | if not self.load_searched_arch: 110 | x = self.layers.Conv2D(first_block_filters[0][1], kernel_size=3, strides=2, padding='valid', use_bias=False, name='conv2d_01', kernel_regularizer=weight_regularizer)(x) 111 | x = ChannelMasking(*first_block_filters[0], name='conv2d_01_savable', regularizer=arch_param_regularizer)(x) 112 | self.last_block_output_shape = first_block_filters[0][1] 113 | else: 114 | x = self.layers.Conv2D(first_block_filters[0], kernel_size=3, strides=2, padding='valid', use_bias=False, name='conv2d_01', kernel_regularizer=weight_regularizer)(x) 115 | self.last_block_output_shape = first_block_filters[0] 116 | x = self.layers.BatchNormalization(epsilon=1e-3, momentum=BATCHNORM_MOMENTUM)(x) 117 | x = self.layers.ReLU(6.)(x) 118 | 119 | 120 | # Inverted residuals 121 | for k, v in self.mapping.items(): 122 | if k.split('_')[0] == 'irb': # ignore conv2d for now 123 | x = self._inverted_res_block(x, filters=v[0], stride=v[1], expansion=v[2], name=k) 124 | 125 | # no alpha applied to last conv as stated in the paper: 126 | # if the width multiplier is greater than 1 we 127 | # increase the number of output channels 128 | last_block_filters = 1984 # TODO try with 1280 129 | 130 | # TODO move this into the _conv_block once we planned to use the channel masking for the below 131 | x = self.layers.Conv2D(last_block_filters, kernel_size=1, use_bias=False, kernel_regularizer=weight_regularizer)(x) 132 | # if not self.load_searched_arch: 133 | # x = layers.ChannelMasking(, 1984, )(x) # TODO test 134 | x = self.layers.BatchNormalization(epsilon=1e-3, momentum=BATCHNORM_MOMENTUM)(x) 135 | x = self.layers.ReLU(6., name='out_relu')(x) 136 | 137 | x = self.layers.GlobalAveragePooling2D()(x) 138 | # x = self.layers.Dense(self.label_dim, use_bias=True, name='Logits', kernel_regularizer=weight_regularizer)(x) 139 | return x 140 | 141 | 142 | 143 | class FBNet_MobileNetV2CIFAR(_FBNet_MobileNetV2): 144 | def __init__(self, *args, **kwargs): 145 | self.mapping = { 146 | # filter_range, Stride, expansion 147 | 'conv2d_01': [(8, 16, 4), 1, 1], 148 | 'irb_01': [(12, 16, 4), 1, 1], 149 | 'irb_02': [(16, 24, 4), 1, 6], 150 | 'irb_03': [(16, 24, 4), 1, 6], 151 | 'irb_04': [(16, 24, 4), 1, 6], 152 | 'irb_05': [(16, 40, 8), 1, 6], 153 | 'irb_06': [(16, 40, 8), 1, 6], 154 | 'irb_07': [(16, 40, 8), 1, 6], 155 | 'irb_08': [(48, 80, 8), 1, 6], 156 | 'irb_09': [(48, 80, 8), 1, 6], 157 | 'irb_10': [(48, 80, 8), 1, 6], 158 | 'irb_11': [(72, 112, 8), 1, 6], 159 | 'irb_12': [(72, 112, 8), 1, 6], 160 | 'irb_13': [(72, 112, 8), 1, 6], 161 | 'irb_14': [(112, 184, 8), 2, 6], 162 | 'irb_15': [(112, 184, 8), 1, 6], 163 | 'irb_16': [(112, 184, 8), 1, 6], 164 | 'irb_17': [(112, 184, 8), 1, 6], 165 | # 'conv2d_2': [1984, 1, 1], 166 | } 167 | super().__init__(*args, **kwargs) 168 | 169 | 170 | class FBNet_MobileNetV2CIFARUP(_FBNet_MobileNetV2): 171 | def __init__(self, *args, **kwargs): 172 | self.mapping = { 173 | # filter_range, Stride, expansion 174 | 'conv2d_01': [(4, 16, 4), 1, 1], 175 | 'irb_01': [(4, 8, 4), 1, 1], 176 | 'irb_02': [(4, 12, 4), 1, 6], 177 | 'irb_03': [(4, 12, 4), 1, 6], 178 | 'irb_04': [(4, 16, 4), 2, 6], 179 | 'irb_05': [(4, 16, 4), 1, 6], 180 | 'irb_06': [(4, 16, 4), 1, 6], 181 | 'irb_07': [(8, 32, 4), 2, 6], 182 | 'irb_08': [(8, 32, 4), 1, 6], 183 | 'irb_09': [(8, 32, 4), 1, 6], 184 | 'irb_10': [(8, 32, 4), 1, 6], 185 | 'irb_11': [(12, 48, 4), 1, 6], 186 | 'irb_12': [(12, 48, 4), 1, 6], 187 | 'irb_13': [(12, 48, 4), 1, 6], 188 | 'irb_14': [(24, 80, 8), 2, 6], 189 | 'irb_15': [(24, 80, 8), 1, 6], 190 | 'irb_16': [(24, 80, 8), 1, 6], 191 | 'irb_17': [(40, 160, 8), 1, 6], 192 | # 'conv2d_2': [1984, 1, 1], 193 | } 194 | super().__init__(*args, **kwargs) 195 | 196 | class FBNet_MobileNetV2Imagenet(_FBNet_MobileNetV2): 197 | def __init__(self, *args, **kwargs): 198 | self.mapping = { 199 | # filter_range, Stride, expansion 200 | 'conv2d_01': [(8, 16, 4), 2, 1], 201 | 'irb_01': [(12, 16, 4), 1, 1], 202 | 'irb_02': [(16, 24, 4), 2, 6], 203 | 'irb_03': [(16, 24, 4), 1, 6], 204 | 'irb_04': [(16, 24, 4), 1, 6], 205 | 'irb_05': [(16, 40, 8), 2, 6], 206 | 'irb_06': [(16, 40, 8), 1, 6], 207 | 'irb_07': [(16, 40, 8), 1, 6], 208 | 'irb_08': [(48, 80, 8), 2, 6], 209 | 'irb_09': [(48, 80, 8), 1, 6], 210 | 'irb_10': [(48, 80, 8), 1, 6], 211 | 'irb_11': [(72, 112, 8), 1, 6], 212 | 'irb_12': [(72, 112, 8), 1, 6], 213 | 'irb_13': [(72, 112, 8), 1, 6], 214 | 'irb_14': [(112, 184, 8), 2, 6], 215 | 'irb_15': [(112, 184, 8), 1, 6], 216 | 'irb_16': [(112, 184, 8), 1, 6], 217 | 'irb_17': [(112, 184, 8), 1, 6], 218 | # 'conv2d_2': [1984, 1, 1], 219 | } 220 | super().__init__(*args, **kwargs) -------------------------------------------------------------------------------- /train_arch_search.py: -------------------------------------------------------------------------------- 1 | import tqdm 2 | import yaml 3 | from src.models.fbnetv2 import ChannelMasking, define_temperature 4 | from submodules.global_dl.global_conf import config_tf2 5 | import math 6 | import os 7 | import tensorflow as tf 8 | import upstride_argparse as argparse 9 | from src.argument_parser import training_arguments_das 10 | from src.data import dataloader 11 | from src import losses 12 | from src.models import model_name_to_class 13 | from src.models.generic_model import framework_list 14 | from src.utils import check_folder, get_imagenet_data, model_dir 15 | from submodules.global_dl import global_conf 16 | from submodules.global_dl.training.training import create_env_directories, setup_mp, define_model_in_strategy, get_callbacks, init_custom_checkpoint_callbacks 17 | from submodules.global_dl.training import training 18 | from submodules.global_dl.training import alchemy_api 19 | from submodules.global_dl.training import export 20 | from submodules.global_dl.training.optimizers import get_lr_scheduler, get_optimizer, StepDecaySchedule, CosineDecay 21 | from submodules.global_dl.training import optimizers 22 | from src.models import fbnetv2 23 | 24 | arguments = [ 25 | ['namespace', 'dataloader', dataloader.arguments], 26 | ['namespace', 'server', alchemy_api.arguments], 27 | ['namespace', 'optimizer', optimizers.arguments], 28 | ['namespace', 'export', export.arguments], 29 | ['namespace', 'arch_search', training_arguments_das], 30 | [int, "factor", 1, 'division factor to scale the number of channel. factor=2 means the model will have half the number of channels compare to default implementation'], 31 | [int, 'n_layers_before_tf', 0, 'when using mix framework, number of layer defined using upstride', lambda x: x >= 0], 32 | [str, 'load_searched_arch', '', 'model definition file containing the searched architecture'], 33 | [bool, 'log_arch', False, 'if true then save the values of the alpha parameters after every epochs in a csv file in log directory'], 34 | [str, "model_name", '', 'Specify the name of the model', lambda x: x in model_name_to_class], 35 | [str, 'framework', 'tensorflow', 'Framework to use to define the model', lambda x: x in framework_list], 36 | ] + global_conf.arguments + training.arguments 37 | 38 | 39 | def main(): 40 | """ function called when starting the code via command-line 41 | """ 42 | args = argparse.parse_cmd(arguments) 43 | args['server'] = alchemy_api.start_training(args['server']) 44 | train(args) 45 | 46 | 47 | def get_experiment_name(args): 48 | experiment_dir = f"{args['model_name']}_{args['framework']}" 49 | if 'mix' in args['framework']: 50 | experiment_dir += "_mix_{}".format(args['n_layers_before_tf']) 51 | if args['configuration']['with_mixed_precision']: 52 | experiment_dir += "_mp" 53 | return experiment_dir 54 | 55 | 56 | def get_train_step_function(model, weights, weight_opt, metrics): 57 | train_accuracy_metric = metrics['accuracy'] 58 | train_cross_entropy_loss_metric = metrics['cross_entropy_loss'] 59 | train_total_loss_metric = metrics['total_loss'] 60 | 61 | @tf.function 62 | def train_step(x_batch, y_batch): 63 | with tf.GradientTape() as tape: 64 | y_hat = model(x_batch, training=True) 65 | cross_entropy_loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_batch, y_hat)) 66 | weight_reg_loss = tf.reduce_sum(model.losses) 67 | total_loss = cross_entropy_loss + weight_reg_loss 68 | train_accuracy_metric.update_state(y_batch, y_hat) 69 | train_cross_entropy_loss_metric.update_state(cross_entropy_loss) 70 | train_total_loss_metric.update_state(total_loss) 71 | # Update the weights 72 | grads = tape.gradient(total_loss, weights) 73 | weight_opt.apply_gradients(zip(grads, weights)) 74 | return train_step 75 | 76 | 77 | def get_train_step_arch_function(model, arch_params, arch_opt, train_metrics, arch_metrics): 78 | latency_reg_loss_metric = arch_metrics['latency_reg_loss'] 79 | train_accuracy_metric = train_metrics['accuracy'] 80 | train_cross_entropy_loss_metric = train_metrics['cross_entropy_loss'] 81 | total_loss_metric = train_metrics['total_loss'] 82 | 83 | @tf.function 84 | def train_step_arch(x_batch, y_batch): 85 | with tf.GradientTape() as tape: 86 | y_hat = model(x_batch, training=False) 87 | cross_entropy_loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_batch, y_hat)) 88 | weight_reg_loss = tf.reduce_sum(model.losses) 89 | latency_reg_loss = losses.parameters_loss(model) / 1.0e6 90 | total_loss = cross_entropy_loss + weight_reg_loss # + latency_reg_loss 91 | latency_reg_loss_metric.update_state(latency_reg_loss) 92 | train_accuracy_metric.update_state(y_batch, y_hat) 93 | train_cross_entropy_loss_metric.update_state(cross_entropy_loss) 94 | total_loss_metric.update_state(total_loss) 95 | # Update the architecture paramaters 96 | grads = tape.gradient(total_loss, arch_params) 97 | arch_opt.apply_gradients(zip(grads, arch_params)) 98 | return train_step_arch 99 | 100 | 101 | def get_eval_step_function(model, metrics): 102 | val_accuracy_metric = metrics['accuracy'] 103 | val_cross_entropy_loss_metric = metrics['cross_entropy_loss'] 104 | 105 | @tf.function 106 | def evaluation_step(x_batch, y_batch): 107 | y_hat = model(x_batch, training=False) 108 | loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_batch, y_hat)) 109 | val_accuracy_metric.update_state(y_batch, y_hat) 110 | val_cross_entropy_loss_metric.update_state(loss) 111 | return evaluation_step 112 | 113 | 114 | def metrics_processing(metrics, summary_writers, keys, template, epoch, postfix=''): 115 | for key in keys: 116 | with summary_writers[key].as_default(): 117 | for sub_key in metrics[key]: 118 | value = float(metrics[key][sub_key].result()) # save metric value 119 | metrics[key][sub_key].reset_states() # reset the metric 120 | template += f", {key}_{sub_key}: {value}" 121 | tf.summary.scalar(sub_key+postfix, value, step=epoch) 122 | return template 123 | 124 | 125 | def train(args): 126 | # config_tf2(args['configuration']['xla']) 127 | # Create log, checkpoint and export directories 128 | checkpoint_dir, log_dir, export_dir = create_env_directories(args, get_experiment_name(args)) 129 | train_log_dir = os.path.join(log_dir, 'train') 130 | val_log_dir = os.path.join(log_dir, 'validation') 131 | arch_log_dir = os.path.join(log_dir, 'arch') 132 | summary_writers = { 133 | 'train': tf.summary.create_file_writer(train_log_dir), 134 | 'val': tf.summary.create_file_writer(val_log_dir), 135 | 'arch': tf.summary.create_file_writer(arch_log_dir) 136 | } 137 | 138 | # Prepare the 3 datasets 139 | train_weight_dataset = dataloader.get_dataset(args['dataloader'], transformation_list=args['dataloader']['train_list'], 140 | num_classes=args["num_classes"], split='train_weights') 141 | train_arch_dataset = dataloader.get_dataset(args['dataloader'], transformation_list=args['dataloader']['train_list'], 142 | num_classes=args["num_classes"], split='train_arch') 143 | val_dataset = dataloader.get_dataset(args['dataloader'], transformation_list=args['dataloader']['val_list'], 144 | num_classes=args["num_classes"], split='test') 145 | 146 | # define model, optimizer and checkpoint callback 147 | setup_mp(args) 148 | model = model_name_to_class[args['model_name']](args['framework'], 149 | input_shape=args['input_size'], 150 | label_dim=args['num_classes']).model 151 | model.summary() 152 | 153 | alchemy_api.send_model_info(model, args['server']) 154 | weights, arch_params = fbnetv2.split_trainable_weights(model) 155 | weight_opt = get_optimizer(args['optimizer']) 156 | arch_opt = get_optimizer(args['arch_search']['optimizer']) 157 | model_checkpoint_cb, latest_epoch = init_custom_checkpoint_callbacks({'model': model}, checkpoint_dir, args['max_checkpoints'] , args['checkpoint_freq']) 158 | callbacks = [ 159 | model_checkpoint_cb 160 | ] 161 | 162 | temperature_decay_fn = fbnetv2.exponential_decay(args['arch_search']['temperature']['init_value'], 163 | args['arch_search']['temperature']['decay_steps'], 164 | args['arch_search']['temperature']['decay_rate']) 165 | 166 | lr_decay_fn = CosineDecay(args['optimizer']['lr'], 167 | alpha=args["optimizer"]["lr_decay_strategy"]["lr_params"]["alpha"], 168 | total_epochs=args['num_epochs']) 169 | 170 | lr_decay_fn_arch = CosineDecay(args['arch_search']['optimizer']['lr'], 171 | alpha=0.000001, 172 | total_epochs=args['num_epochs']) 173 | 174 | metrics = { 175 | 'arch': { 176 | 'latency_reg_loss': tf.keras.metrics.Mean() 177 | }, 178 | 'train': { 179 | 'total_loss': tf.keras.metrics.Mean(), 180 | 'accuracy': tf.keras.metrics.CategoricalAccuracy(), 181 | 'cross_entropy_loss': tf.keras.metrics.Mean(), 182 | }, 183 | 'val': { 184 | 'accuracy': tf.keras.metrics.CategoricalAccuracy(), 185 | 'cross_entropy_loss': tf.keras.metrics.Mean(), 186 | } 187 | } 188 | 189 | train_step = get_train_step_function(model, weights, weight_opt, metrics['train']) 190 | train_step_arch = get_train_step_arch_function(model, arch_params, arch_opt, metrics['train'], metrics['arch']) 191 | evaluation_step = get_eval_step_function(model, metrics['val']) 192 | 193 | for epoch in range(latest_epoch, args['num_epochs']): 194 | print(f'Epoch: {epoch}/{args["num_epochs"]}') 195 | # Update both LR 196 | weight_opt.learning_rate = lr_decay_fn(epoch) 197 | arch_opt.learning_rate = lr_decay_fn_arch(epoch) 198 | # Updating the weight parameters using a subset of the training data 199 | for step, (x_batch, y_batch) in tqdm.tqdm(enumerate(train_weight_dataset, start=1)): 200 | train_step(x_batch, y_batch) 201 | # Evaluate the model on validation subset 202 | for x_batch, y_batch in val_dataset: 203 | evaluation_step(x_batch, y_batch) 204 | # Handle metrics 205 | template = f"Weights updated, Epoch {epoch}" 206 | template = metrics_processing(metrics, summary_writers, ['train', 'val'], template, epoch) 207 | template += f", lr: {float(weight_opt.learning_rate)}" 208 | print(template) 209 | 210 | new_temperature = temperature_decay_fn(epoch) 211 | with summary_writers['train'].as_default(): 212 | tf.summary.scalar('temperature', new_temperature, step=epoch) 213 | define_temperature(new_temperature) 214 | 215 | if epoch >= args['arch_search']['num_warmup']: 216 | # Updating the architectural parameters on another subset 217 | for step, (x_batch, y_batch) in tqdm.tqdm(enumerate(train_arch_dataset, start=1)): 218 | train_step_arch(x_batch, y_batch) 219 | # Evaluate the model on validation subset 220 | for x_batch, y_batch in val_dataset: 221 | evaluation_step(x_batch, y_batch) 222 | # Handle metrics 223 | template = f'Architecture updated, Epoch {epoch}' 224 | template = metrics_processing(metrics, summary_writers, ['train', 'val', 'arch'], template, epoch, postfix='_arch') 225 | template += f", lr: {float(arch_opt.learning_rate)}" 226 | print(template) 227 | # move saved outside of condition so we save starting from the begining 228 | fbnetv2.save_arch_params(model, epoch, log_dir) 229 | 230 | # manually call the callbacks 231 | for callback in callbacks: 232 | callback.on_epoch_end(epoch, logs=None) 233 | 234 | print("Training Completed!!") 235 | 236 | print("Architecture params: ") 237 | print(arch_params) 238 | fbnetv2.post_training_analysis(model, args['arch_search']['exported_architecture']) 239 | 240 | 241 | if __name__ == '__main__': 242 | main() 243 | --------------------------------------------------------------------------------