├── src
    ├── __init__.py
    ├── argument_parser.py
    ├── models
    │   ├── test_pdart.py
    │   ├── tiny_darknet.py
    │   ├── test_fbnet_mobilenet.py
    │   ├── test_generic_model.py
    │   ├── vgg.py
    │   ├── squeezenet.py
    │   ├── __init__.py
    │   ├── wide_resnet.py
    │   ├── complexnet.py
    │   ├── generic_model.py
    │   ├── hypermodels.py
    │   ├── fbnetv2.py
    │   ├── mobilenet.py
    │   ├── alexnet.py
    │   ├── test_fbnetv2.py
    │   ├── resnet.py
    │   └── fbnet_mobilenet.py
    ├── test_losses.py
    ├── data
    │   ├── test_dataloader.py
    │   └── dataloader.py
    ├── losses.py
    ├── test_argument_parser.py
    ├── test_tfrecord_extractor.py
    ├── utils.py
    ├── inference_benchmark.py
    └── test_utils.py
├── tests
    ├── __init__.py
    ├── system_tests
    │   ├── dataviz.sh
    │   ├── models_training_upstride.sh
    │   ├── inference_benchmark.sh
    │   └── models_training.sh
    └── unit_tests
    │   └── test_channels_first_last.py
├── ressources
    ├── training.gif
    ├── keras_tuner.png
    └── testing
    │   ├── cat.png
    │   ├── black_and_white.jpeg
    │   ├── fake_LOC_val_solution.csv
    │   ├── config.yml
    │   └── fake_LOC_synset_mapping.txt
├── .gitmodules
├── .gitignore
├── conf.yml
├── dockerfiles
    ├── tensorflow.dockerfile
    └── upstride.dockerfile
├── copy_and_resize.py
├── makefile
├── dataviz.py
├── test.py
├── scripts
    ├── bayesian_opt_results_parser.py
    ├── alpha_viz.py
    ├── hyperband_results_parser.py
    └── test_tfrecord_writer.py
├── README.md
├── inference_client.py
├── inference_server.py
├── train_keras_tuner.py
├── inference_benchmark.py
├── documentation
    └── doc.md
└── train_arch_search.py


/src/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ressources/training.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UpStride/classification-api/HEAD/ressources/training.gif


--------------------------------------------------------------------------------
/ressources/keras_tuner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UpStride/classification-api/HEAD/ressources/keras_tuner.png


--------------------------------------------------------------------------------
/ressources/testing/cat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UpStride/classification-api/HEAD/ressources/testing/cat.png


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "submodules/global_dl"]
2 | 	path = submodules/global_dl
3 | 	url = git@github.com:UpStride/global-dl.git
4 | 


--------------------------------------------------------------------------------
/ressources/testing/black_and_white.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UpStride/classification-api/HEAD/ressources/testing/black_and_white.jpeg


--------------------------------------------------------------------------------
/ressources/testing/fake_LOC_val_solution.csv:
--------------------------------------------------------------------------------
1 | ImageId,PredictionString
2 | ILSVRC2012_val_0,n01484850 85 1 499 272
3 | ILSVRC2012_val_1,n01496331 131 0 499 254
4 | 


--------------------------------------------------------------------------------
/ressources/testing/config.yml:
--------------------------------------------------------------------------------
1 | parameter_int: 1
2 | parameter_str: plop
3 | parameter_list: [1, 2, 3]
4 | parameter_bool: true
5 | parameter_dict:
6 |   parameter_int: 3


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | results*.md
 3 | profiling
 4 | README_perso.md
 5 | user_conf*.yml
 6 | .coverage
 7 | coverage.xml
 8 | rabbitmq_conf.yml
 9 | *_conf.yml
10 | .vscode
11 | inference_config
12 | .stfolder
13 | .ipynb_checkpoints
14 | configs
15 | wandb


--------------------------------------------------------------------------------
/conf.yml:
--------------------------------------------------------------------------------
 1 | model_name: MobileNetV2
 2 | num_epochs: 2
 3 | checkpoint_dir: exp/checkpoint
 4 | log_dir: exp/log
 5 | framework: tensorflow
 6 | export_dir: exp
 7 | dataloader:
 8 |   name: cifar10
 9 |   train_list: [RandomHorizontalFlip, Normalize]
10 |   val_list:  [Normalize]
11 |   batch_size: 64
12 | input_size: [32, 32, 3]
13 | num_classes: 10
14 | optimizer:
15 |   lr: 0.0001
16 | 


--------------------------------------------------------------------------------
/ressources/testing/fake_LOC_synset_mapping.txt:
--------------------------------------------------------------------------------
 1 | n01440764 tench, Tinca tinca
 2 | n01443537 goldfish, Carassius auratus
 3 | n01484850 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias
 4 | n01491361 tiger shark, Galeocerdo cuvieri
 5 | n01494475 hammerhead, hammerhead shark
 6 | n01496331 electric ray, crampfish, numbfish, torpedo
 7 | n01498041 stingray
 8 | n01514668 cock
 9 | n01514859 hen
10 | n01518878 ostrich, Struthio camelus
11 | 


--------------------------------------------------------------------------------
/dockerfiles/tensorflow.dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tensorflow/tensorflow:2.4.1-gpu
 2 | 
 3 | RUN apt-get update && \
 4 |     apt-get install -y libsm6 libxrender1 libxext6 libgl1-mesa-glx && \
 5 |     pip install \
 6 |     opencv-python \
 7 |     pyyaml \
 8 |     tensorflow_datasets \
 9 |     upstride_argparse \
10 |     keras-tuner \
11 |     pandas \
12 |     wandb \
13 |     tensorflow_addons && \
14 |     rm -rf /var/lib/apt/lists/*
15 | 
16 | COPY src /opt/src
17 | COPY submodules /opt/submodules
18 | COPY train.py /opt/train.py
19 | COPY train_arch_search.py /opt/train_arch_search.py
20 | WORKDIR /opt
21 | CMD python train.py
22 | 


--------------------------------------------------------------------------------
/tests/system_tests/dataviz.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | python dataviz.py  \
 4 |     --dataloader.batch_size 128 \
 5 |     --dataloader.name imagenette/full-size-v2 \
 6 |     --dataloader.train_list RandomCropThenResize RandomHorizontalFlip Cutout ColorJitter Translate \
 7 |     --dataloader.val_list CentralCrop \
 8 |     --dataloader.val_split_id validation \
 9 |     --dataloader.train_split_id train \
10 |     --dataloader.Translate.width_shift_range 0.2 \
11 |     --dataloader.Translate.height_shift_range 0.2 \
12 |     --dataloader.RandomCrop.size 224 224 3 \
13 |     --dataloader.CentralCrop.size 224 224 \
14 |     --dataloader.Cutout.length 16 \
15 |     


--------------------------------------------------------------------------------
/dockerfiles/upstride.dockerfile:
--------------------------------------------------------------------------------
 1 | FROM eu.gcr.io/fluid-door-230710/upstride:py-1.1.1-tf2.3.0-gpu
 2 | 
 3 | RUN apt-get update && \
 4 |     apt-get install -y libsm6 libxrender1 libxext6 libgl1-mesa-glx && \
 5 |     pip install \
 6 |     opencv-python \
 7 |     pyyaml \
 8 |     tensorflow_datasets \
 9 |     upstride_argparse \
10 |     keras-tuner \
11 |     pandas \
12 |     wandb \
13 |     tensorflow_addons && \
14 |     rm -rf /var/lib/apt/lists/*
15 | 
16 | COPY src /opt/src
17 | COPY submodules /opt/submodules
18 | COPY train.py /opt/train.py
19 | COPY train_arch_search.py /opt/train_arch_search.py
20 | WORKDIR /opt
21 | CMD python train.py
22 | 


--------------------------------------------------------------------------------
/copy_and_resize.py:
--------------------------------------------------------------------------------
 1 | """this script copy all a directory structure and resize all the images to a specific size
 2 | """
 3 | import os
 4 | from src.argument_parser import parse_config
 5 | from src.utils import copy_and_resize
 6 | 
 7 | 
 8 | def main():
 9 |   arguments = [[str, "source", "", "directory to copy", lambda x: x != "" and os.path.isdir(x)],
10 |                [str, "dest", "", "destination of the copy", lambda x: x != ""],
11 |                [int, "img_size",  256, "height and width of the images after copy (the image is a square)"]]
12 |   args = parse_config(arguments)
13 |   copy_and_resize(args.source, args.dest, args.img_size)
14 | 
15 | 
16 | if __name__ == "__main__":
17 |   main()
18 | 


--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
 1 | build_tensorflow:
 2 | 	docker build -t upstride/classification_api:tensorflow-2.0 -f dockerfiles/tensorflow.dockerfile .
 3 | 
 4 | build:
 5 | 	docker build -t upstride/classification_api:upstride-2.0 -f dockerfiles/upstride.dockerfile .
 6 | 
 7 | run:
 8 | 	@docker run -it --rm --gpus all --privileged \
 9 | 		-v $$(pwd):/opt \
10 | 		-v ~/tensorflow_datasets/:/root/tensorflow_datasets \
11 | 		-v ~/.keras/datasets:/root/.keras/datasets \
12 | 		upstride/classification_api:upstride-2.0 \
13 | 		bash
14 | 
15 | run_tensorflow:
16 | 	@docker run -it --rm --gpus all --privileged \
17 | 		-v $$(pwd):/opt \
18 | 		-v ~/tensorflow_datasets/:/root/tensorflow_datasets \
19 | 		-v ~/.keras/datasets:/root/.keras/datasets \
20 | 		upstride/classification_api:tensorflow-2.0 \
21 | 		bash
22 | 		


--------------------------------------------------------------------------------
/dataviz.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import os
 3 | import tensorflow as tf
 4 | import upstride_argparse as argparse
 5 | from src.data import dataloader
 6 | 
 7 | arguments = [
 8 |     ['namespace', 'dataloader', dataloader.arguments],
 9 | ]
10 | 
11 | 
12 | def main():
13 |   config = argparse.parse_cmd(arguments)
14 |   datasets = {
15 |       'train': dataloader.get_dataset(config['dataloader'], transformation_list=config['dataloader']['train_list'], num_classes=10, split=config['dataloader']['train_split_id']),
16 |       'val': dataloader.get_dataset(config['dataloader'], transformation_list=config['dataloader']['val_list'], num_classes=10, split=config['dataloader']['val_split_id'])
17 |   }
18 | 
19 |   for dataset_type in ['train', 'val']:
20 |     for i, (images, y) in enumerate(datasets[dataset_type]):
21 |       image = images[0]
22 |       # opencv manage images as BGR object, TF as RGB
23 |       image = image.numpy()[:, :, ::-1]
24 |       cv2.imwrite(os.path.join('/tmp', f'{dataset_type}_{i}.png'), image)
25 |       if i == 20:
26 |         break
27 | 
28 | 
29 | if __name__ == '__main__':
30 |   main()
31 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import unittest
 3 | from src.data.test_augmentations import TestAugmentations
 4 | from src.data.test_dataloader import TestDataLoader
 5 | from src.test_losses import TestLosses
 6 | from src.models.test_fbnetv2 import *
 7 | from src.models.test_fbnet_mobilenet import * 
 8 | from src.models.test_pdart import * 
 9 | 
10 | # from src.test_utils import TestUtils
11 | # from src.models.test_generic_model import TestModel1 # TestLayer
12 | # from src.test_export import TestExport
13 | # from src.test_model_tools import TestLRDecay
14 | # from src.test_metrics import TestMetrics, TestCountFlops
15 | 
16 | sys.path.append('scripts')
17 | sys.path.append('tests')
18 | sys.path.append('ressources')
19 | 
20 | from scripts.test_tfrecord_writer import TestTfrecordWriter
21 | from tests.unit_tests.test_compare_dataloader import TestCompareDataLoader
22 | # dev note: TestCompareChannelsFirstLast take around ~5 minutes as there are lots of models to build.
23 | from tests.unit_tests.test_channels_first_last import TestCompareChannelsFirstLast
24 | 
25 | if __name__ == "__main__":
26 |   unittest.main()
27 | 


--------------------------------------------------------------------------------
/src/argument_parser.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from .models.generic_model import framework_list
 3 | from submodules.global_dl.training.optimizers import optimizer_list
 4 | 
 5 | 
 6 | training_arguments_das = [
 7 |     ['namespace', 'temperature', [
 8 |         [int, 'init_value', 5, 'initial value of the temperature parameters for to control Gumbel Softmax'],
 9 |         [float, 'decay_rate', 0.956, 'decay rate to anneal temperature'],
10 |         [int, 'decay_steps', 1, 'decay steps']
11 |     ]],
12 | 
13 |     ['namespace', 'optimizer', [
14 |         [str, 'name', 'adam', 'optimize to be used for updating architecture parameters for search', lambda x: x.lower() in optimizer_list],
15 |         [float, "lr", 0.1, 'learning rate', lambda x: x > 0],
16 |         [float, 'momentum', 0.9, 'used when optimizer name is specified as sgd_momentum'],
17 |     ]],
18 |     [str, 'exported_architecture', 'export.yml', 'file to write the exported architecture'],
19 |     [float, 'weight_decay', 1e-4, 'weight decay rate'],
20 |     [float, 'arch_param_decay', 5e-4, 'weight decay rate for architecture parameters'],
21 |     [int, 'num_warmup', 10, 'number of warmup epoch']
22 | ]
23 | 


--------------------------------------------------------------------------------
/src/models/test_pdart.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | from .pdart import DropPath
 5 | 
 6 | class TestDropPath(unittest.TestCase):
 7 |   def test(self):
 8 |     x = tf.ones(shape=(10000, 1, 1, 1))
 9 |     drop_path_prob = tf.convert_to_tensor(0.3)
10 |     y = DropPath()([x, drop_path_prob])
11 |     # Mean of y shouldn't change much
12 |     self.assertAlmostEqual(tf.reduce_sum(y).numpy()/10000, 1., 1)
13 | 
14 |   def test_nn(self):
15 |     """ Create a single layer NN
16 |     """
17 |     # define NN
18 |     x = tf.keras.layers.Input(shape=(1, 1, 1))
19 |     drop_path_prob = tf.keras.layers.Input(shape=[])
20 |     y = DropPath()([x, drop_path_prob])
21 |     model = tf.keras.Model(inputs=[x, drop_path_prob], outputs=y)
22 | 
23 |     # run NN
24 |     inputs = [tf.ones(shape=(1000, 1, 1, 1)), tf.convert_to_tensor(0.3)]
25 |     outputs = model(inputs)
26 |     outputs_mean = tf.reduce_mean(outputs)
27 |     self.assertAlmostEqual(outputs_mean.numpy(), 1., 1)
28 | 
29 |     inputs = [tf.ones(shape=(1000, 1, 1, 1)), tf.convert_to_tensor(0.)]
30 |     outputs2 = model(inputs)
31 |     np.array_equal(np.ones(shape=(1000, 1, 1, 1)), outputs2.numpy())
32 | 


--------------------------------------------------------------------------------
/tests/system_tests/models_training_upstride.sh:
--------------------------------------------------------------------------------
 1 | # train a mobilenet channel first with upstride
 2 | # need a least 6 GB of VRAM
 3 | 
 4 | python train.py \
 5 |     --model_name MobileNetV2Cifar10NCHW \
 6 |     --model.upstride_type 2 \
 7 |     --model.factor 4 \
 8 |     --model.num_classes 10 \
 9 |     --model.input_size 32 32 3 \
10 |     --num_epochs 1000 \
11 |     --checkpoint_dir /tmp/checkpointdata2345 \
12 |     --log_dir log/translate \
13 |     --dataloader.name cifar10 \
14 |     --dataloader.train_list RandomHorizontalFlip Translate Cutout Normalize \
15 |     --dataloader.val_list Normalize \
16 |     --dataloader.val_split_id test \
17 |     --dataloader.Resize.size 36 36 \
18 |     --dataloader.RandomCrop.size 32 32 3 \
19 |     --dataloader.Translate.width_shift_range 0.25 \
20 |     --dataloader.Translate.height_shift_range 0.25 \
21 |     --dataloader.Cutout.length 4 \
22 |     --dataloader.batch_size 128 \
23 |     --early_stopping 40 \
24 |     --optimizer.lr 0.1 \
25 |     --optimizer.lr_decay_strategy.lr_params.patience 20 \
26 |     --optimizer.lr_decay_strategy.lr_params.strategy lr_reduce_on_plateau \
27 |     --optimizer.lr_decay_strategy.lr_params.decay_rate 0.3 \
28 |     --config.mixed_precision
29 | 
30 | rm -r /tmp/results 
31 | rm -r /tmp/checkpoint
32 | 


--------------------------------------------------------------------------------
/tests/system_tests/inference_benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh 
 2 | 
 3 | set -e
 4 | 
 5 | # Call the benchmarking script without going through the experiment management
 6 | python src/inference_benchmark.py
 7 | 
 8 | # With basic TensorFlow
 9 | python inference_benchmark.py \
10 |     --batch_size 32 \
11 |     --comments plop \
12 |     --cuda_visible_device 0 \
13 |     --docker_images local \
14 |     --engines tensorflow \
15 |     --factor 1 \
16 |     --models MobileNetV2NCHW \
17 |     --output /tmp/results.md \
18 |     --profiling_dir /tmp/profiling \
19 |     --n_steps 10
20 | 
21 | # With tensorRT FP32
22 | python inference_benchmark.py \
23 |     --batch_size 32 \
24 |     --comments plop \
25 |     --cuda_visible_device 0 \
26 |     --docker_images local \
27 |     --engines tensorflow \
28 |     --factor 1 \
29 |     --models MobileNetV2NCHW \
30 |     --output /tmp/results.md \
31 |     --profiling_dir /tmp/profiling \
32 |     --n_steps 10 \
33 |     --tensorrt \
34 |     --tensorrt_precision FP32
35 | 
36 | # With tensorRT FP16
37 | python inference_benchmark.py \
38 |     --batch_size 32 \
39 |     --comments plop \
40 |     --cuda_visible_device 0 \
41 |     --docker_images local \
42 |     --engines tensorflow \
43 |     --factor 1 \
44 |     --models MobileNetV2NCHW \
45 |     --output /tmp/results.md \
46 |     --profiling_dir /tmp/profiling \
47 |     --n_steps 10 \
48 |     --tensorrt \
49 |     --tensorrt_precision FP16
50 | 


--------------------------------------------------------------------------------
/src/models/tiny_darknet.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .generic_model import GenericModelBuilder
 3 | 
 4 | 
 5 | class TinyDarknet(GenericModelBuilder):
 6 |   def model(self, x):
 7 |     # First half
 8 |     x = self.conv2d_unit(x, filters=16 // self.factor, kernels=3)
 9 |     x = self.layers.MaxPool2D(pool_size=2, strides=2, padding='same')(x)
10 |     x = self.conv2d_unit(x, filters=32 // self.factor, kernels=3)
11 |     x = self.layers.MaxPool2D(pool_size=2, strides=2, padding='same')(x)
12 |     x = self.conv2d_unit(x, filters=64 // self.factor, kernels=3)
13 |     x = self.layers.MaxPool2D(pool_size=2, strides=2, padding='same')(x)
14 |     x = self.conv2d_unit(x, filters=128 // self.factor, kernels=3)
15 |     x = self.layers.MaxPool2D(pool_size=2, strides=2, padding='same')(x)
16 |     x = self.conv2d_unit(x, filters=256 // self.factor, kernels=3)
17 | 
18 |     # 2nd half
19 |     x = self.layers.MaxPool2D(pool_size=2, strides=2, padding='same')(x)
20 |     x = self.conv2d_unit(x, filters=512 // self.factor, kernels=3)
21 |     x = self.layers.MaxPool2D(pool_size=2, strides=1, padding='same')(x)
22 |     x = self.conv2d_unit(x, filters=1024 // self.factor, kernels=3)
23 | 
24 |     x = self.layers.GlobalAveragePooling2D()(x)
25 |     return x
26 | 
27 |   def conv2d_unit(self, x, filters, kernels, strides=1, padding='same'):
28 |     x = self.layers.Conv2D(filters, kernels, padding=padding, strides=strides, use_bias=False)(x)
29 |     x = self.layers.BatchNormalization(axis=self.channel_axis)(x)
30 |     x = self.layers.LeakyReLU(alpha=0.1)(x)
31 |     return x
32 | 


--------------------------------------------------------------------------------
/src/test_losses.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import tensorflow as tf
 3 | from src.models.fbnetv2 import ChannelMasking
 4 | from src.losses import flops_loss
 5 | 
 6 | 
 7 | class TestLosses(unittest.TestCase):
 8 |   def test_flops_loss(self):
 9 |     model = tf.keras.Sequential([
10 |         tf.keras.layers.Input(shape=(24, 24, 3)),
11 |         tf.keras.layers.Conv2D(3, (3, 3), padding='same', use_bias=False),
12 |         ChannelMasking(1, 3, 1, "hello", gumble_noise=False)
13 |     ])
14 | 
15 |     model(tf.zeros((1, 24, 24, 3), dtype=tf.float32))
16 |     l = flops_loss(model)
17 |     conv_flops = 3*3*3*3*24*24 * 2
18 |     self.assertLess(l, conv_flops)
19 |     self.assertAlmostEqual(float(l), conv_flops * ((1/3)**2 + (1/3)*(2/3)+(1/3)))
20 | 
21 |     model.layers[1].g = tf.convert_to_tensor([1., 0., 0.], dtype=tf.float32)
22 |     l = flops_loss(model)
23 |     self.assertAlmostEqual(float(l), conv_flops * ((1/3)))
24 | 
25 |     model.layers[1].g = tf.convert_to_tensor([0., 1., 0.], dtype=tf.float32)
26 |     l = flops_loss(model)
27 |     self.assertAlmostEqual(float(l), conv_flops * ((2/3)))
28 | 
29 |     model.layers[1].g = tf.convert_to_tensor([0., 0., 1.], dtype=tf.float32)
30 |     l = flops_loss(model)
31 |     self.assertAlmostEqual(float(l), conv_flops)
32 | 
33 |   def test_flops_with_intermediate_ops_loss(self):
34 |     model = tf.keras.Sequential([
35 |         tf.keras.layers.Input(shape=(24, 24, 3)),
36 |         tf.keras.layers.Conv2D(3, (3, 3), padding='same', use_bias=False),
37 |         tf.keras.layers.ReLU(),
38 |         ChannelMasking(1, 3, 1, "hello", gumble_noise=False)
39 |     ])
40 |     model(tf.zeros((1, 24, 24, 3), dtype=tf.float32))
41 |     l = flops_loss(model)
42 |     conv_flops = 3*3*3*3*24*24 * 2
43 |     self.assertLess(l, conv_flops)
44 |     self.assertAlmostEqual(float(l), conv_flops * ((1/3)**2 + (1/3)*(2/3)+(1/3)))
45 | 


--------------------------------------------------------------------------------
/src/models/test_fbnet_mobilenet.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import yaml
 3 | import tempfile
 4 | import shutil
 5 | import numpy as np
 6 | 
 7 | from .fbnet_mobilenet import FBNet_MobileNetV2Imagenet
 8 | import tensorflow as tf
 9 | 
10 | 
11 | class TestFBnetMobileNet(unittest.TestCase):
12 |   @classmethod
13 |   def setUpClass(cls):
14 |     cls.img = np.ones((1, 224, 224, 3), dtype=np.float32)
15 | 
16 |     cls.test_mapping = {
17 |         "conv2d_01": 24,
18 |         "irb_01": 20,
19 |         "irb_02": 40,
20 |         "irb_03": 32,
21 |         "irb_04": 40,
22 |         "irb_05": 80,
23 |         "irb_06": 64,
24 |         "irb_07": 80,
25 |         "irb_08": 160,
26 |         "irb_09": 96,
27 |         "irb_10": 152,
28 |         "irb_11": 224,
29 |         "irb_12": 136,
30 |         "irb_13": 224,
31 |         "irb_14": 160,
32 |         "irb_15": 352,
33 |         "irb_16": 368,
34 |         "irb_17": 336
35 |     }
36 | 
37 |     cls.tempdir = tempfile.mkdtemp()
38 |     cls.file_path = cls.tempdir + '/test.yaml'
39 |     with open(cls.file_path, 'w') as f:
40 |       yaml.dump(cls.test_mapping, f)
41 | 
42 |     cls.channel_last = True  # TODO test for channels first
43 | 
44 |   def test_init(self):
45 |     print(self.img[1:])
46 |     params = {
47 |         'input_size': self.img.shape[1:],
48 |         'changing_ids': [],
49 |         'num_classes': 10,
50 |         'factor': 1
51 |     }
52 | 
53 |     model = FBNet_MobileNetV2Imagenet(**params).build()
54 | 
55 |     # model.summary()
56 |     get_dict = {}
57 |     for layer in model.layers:
58 |       # This type of checking the channels based on the architecture is not ideal.
59 |       # For this specific case we use the projection of the MobileNet block to get the channels used.
60 |       if layer.name.startswith('conv2d_01') or layer.name.endswith('project'):
61 |         get_dict[layer.name.split('project')[0]] = layer.output.shape[-1] if self.channel_last else layer.output.shape[1]
62 | 
63 |     # TODO correct this test
64 |     # self.assertDictEqual(get_dict,self.test_mapping)
65 |     shutil.rmtree(self.tempdir)
66 | 


--------------------------------------------------------------------------------
/src/models/test_generic_model.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import unittest
 3 | import tensorflow as tf
 4 | from unittest.mock import MagicMock, patch
 5 | from .generic_model import Layer, GenericModel
 6 | 
 7 | # sys.modules['upstride.type2.tf.keras.layers'] = MagicMock()
 8 | # sys.modules['upstride'] = MagicMock()
 9 | 
10 | 
11 | class TestLayer(unittest.TestCase):
12 |   def test_n_layers_before_tf(self):
13 |     layer = Layer("tensorflow", n_layers_before_tf=3)
14 |     # n_layers_before_tf is ignored with "tensorflow"
15 |     self.assertEqual(layer(), tf.keras.layers)
16 | 
17 |     layer = Layer("upstride_type2", n_layers_before_tf=3)
18 |     self.assertNotEqual(layer(), tf.keras.layers)
19 |     self.assertNotEqual(layer(), tf.keras.layers)
20 |     self.assertNotEqual(layer(), tf.keras.layers)
21 |     self.assertNotEqual(layer(), tf.keras.layers)
22 | 
23 |     layer = Layer("mix_type2", n_layers_before_tf=3)
24 |     self.assertNotEqual(layer(), tf.keras.layers)
25 |     self.assertNotEqual(layer(), tf.keras.layers)
26 |     self.assertNotEqual(layer(), tf.keras.layers)
27 |     self.assertEqual(layer(), tf.keras.layers)
28 | 
29 | 
30 | class Model1(GenericModel):
31 |   def model(self):
32 |     self.x = self.layers().Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(self.x)
33 |     self.x = self.layers().Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(self.x)
34 |     self.x = self.layers().MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(self.x)
35 | 
36 | 
37 | class TestModel1(unittest.TestCase):
38 |   def test_model(self):
39 |     # This unit test doesn't work anymore because we can't know which engine is used
40 |     pass
41 |     # model = Model1('mix_type2', factor=4, n_layers_before_tf=1).model
42 | 
43 |     # # got with model.summary()
44 |     # model.summary()
45 |     # layer_names = [
46 |     #     'InputLayer',
47 |     #     'TF2Upstride',
48 |     #     'Upstride_2_Conv2D',
49 |     #     'Upstride2TF',
50 |     #     'Conv2D',
51 |     #     'MaxPooling2D',
52 |     #     'Activation',
53 |     # ]
54 | 
55 |     # for i in range(7):
56 |     #     print(model.get_layer(index=i))
57 | 


--------------------------------------------------------------------------------
/src/data/test_dataloader.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import shutil
 4 | import tempfile
 5 | import unittest
 6 | import cv2
 7 | import numpy as np
 8 | import tensorflow as tf
 9 | from . import dataloader
10 | 
11 | 
12 | class TestDataLoader(unittest.TestCase):
13 |   def test_map_fn(self):
14 |     transformation_list = ['ResizeThenRandomCrop']
15 |     config = {
16 |       'ResizeThenRandomCrop': {
17 |         "size": [256, 256],
18 |         "crop_size": [224, 224, 3],
19 |         "interpolation": 'bicubic'
20 |       }
21 |     }
22 |     map_fn = dataloader.get_map_fn(transformation_list, config, n_classes=2)
23 |     dataset_dir = create_fake_dataset()
24 |     image = cv2.imread(os.path.join(dataset_dir, 'dog/1.jpg'))
25 |     image = tf.convert_to_tensor(image)
26 |     image, label = map_fn(image, tf.convert_to_tensor(1))
27 |     self.assertEqual(label.numpy()[0], 0)
28 |     self.assertEqual(label.numpy()[1], 1)
29 |     self.assertTrue(np.allclose(image.numpy(), np.ones((224, 224, 3) , dtype=np.float32)*255))
30 | 
31 |   def test_get_dataset_from_tfds(self):
32 |     config = {
33 |       'name': 'mnist',
34 |       'data_dir': None,
35 |       'batch_size': 7,
36 |       'train_split_id': 'train'
37 |     }
38 |     dataset = dataloader.get_dataset_from_tfds(config, [], 10, split='train')
39 | 
40 |     i = 0
41 |     for image, label in dataset:
42 |       self.assertEqual(label.numpy().shape, (7, 10))
43 |       self.assertTrue(label.numpy()[0, 0] in [0, 1])
44 |       self.assertTrue(label.numpy()[1, 1] in [0, 1])
45 |       self.assertEqual(image.numpy().shape, (7, 28, 28, 1))
46 |       i += 1
47 |       if i == 3:
48 |         break
49 | 
50 |     self.assertEqual(i, 3)
51 | 
52 | 
53 | def create_fake_dataset(n_images_per_class=2):
54 |   dataset_dir = tempfile.mkdtemp()
55 |   os.makedirs(os.path.join(dataset_dir, 'cat'))
56 |   os.makedirs(os.path.join(dataset_dir, 'dog'))
57 |   for i in range(n_images_per_class):
58 |     cv2.imwrite(os.path.join(dataset_dir, 'dog', '{}.jpg'.format(i)), np.ones((640, 480, 3), dtype=np.uint8) * 255)
59 |     cv2.imwrite(os.path.join(dataset_dir, 'cat', '{}.jpg'.format(i)), np.ones((640, 480, 3), dtype=np.uint8) * 255)
60 |   return dataset_dir
61 | 


--------------------------------------------------------------------------------
/src/losses.py:
--------------------------------------------------------------------------------
 1 | from src.models.fbnetv2 import ChannelMasking
 2 | from submodules.global_dl.training import metrics
 3 | import tensorflow as tf
 4 | 
 5 | def _count_parameters_conv2d(layer):
 6 |   if type(layer.input_shape) is list:
 7 |     input_shape = layer.input_shape[0]
 8 |   else:
 9 |     input_shape = layer.input_shape
10 | 
11 |   if type(layer.output_shape) is list:
12 |     output_shape = layer.output_shape[0]
13 |   else:
14 |     output_shape = layer.output_shape
15 | 
16 |   if layer.data_format == "channels_first":
17 |     input_channels = layer.input_shape[1]
18 |     output_channels, h, w, = output_shape[1:]
19 |   elif layer.data_format == "channels_last":
20 |     input_channels = input_shape[3]
21 |     h, w, output_channels = output_shape[1:]
22 |   w_h, w_w = layer.kernel_size
23 | 
24 |   num_params = output_channels * input_channels * w_h * w_w
25 | 
26 |   if layer.use_bias:
27 |     num_params += output_channels
28 | 
29 |   return int(num_params)
30 | 
31 | 
32 | def flops_loss(model):
33 |   """loss function defined by number of flops, usefull for Differential Architecture Search
34 | 
35 |   This function is compatible both with TensorFlow and UpStride engine
36 |   
37 |   Args:
38 |       model: Keras model containing some ChannelMasking layers
39 |   
40 |   Returns:
41 |       float: loss
42 |   """
43 |   loss = 0
44 |   for layer in model.layers:
45 |     if "Conv2D" in str(type(layer)) and "Depthwise" not in str(type(layer)):
46 |       flops = metrics._count_flops_conv2d(layer)
47 |     if type(layer) == ChannelMasking:
48 |       # flops is the number of flops of the channel just before ChannelMasking
49 |       g = layer.g
50 |       param_ratio = [flops * (layer.min + i * layer.step)/layer.max for i in range(layer.g.shape[0])]
51 |       loss += tf.math.reduce_sum(g * tf.convert_to_tensor(param_ratio))
52 |   return loss
53 | 
54 | 
55 | def parameters_loss(model):
56 |   loss = 0
57 |   for layer in model.layers:
58 |     if "Conv2D" in str(type(layer)) and "Depthwise" not in str(type(layer)):
59 |       n_params = _count_parameters_conv2d(layer)
60 |     if type(layer) == ChannelMasking:
61 |       # parameters are the number of parameters of the channel just before ChannelMasking
62 |       g = layer.g
63 |       param_ratio = [n_params * (layer.min + i * layer.step)/layer.max for i in range(g.shape[0])]
64 |       loss += tf.math.reduce_sum(g * tf.convert_to_tensor(param_ratio))
65 |   return loss
66 | 


--------------------------------------------------------------------------------
/scripts/bayesian_opt_results_parser.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import seaborn as sns
 4 | import pandas as pd
 5 | import matplotlib
 6 | import matplotlib.pyplot as plt
 7 | import upstride_argparse as argparse
 8 | 
 9 | arguments = [
10 |     [str, "server", '', 'address of the server to connect using ssh'],
11 |     [str, 'remote_dir', '', "directory of the keras tuner experiment on the remote server"],
12 |     [str, 'csv_path', '/tmp/results.csv', 'path to write csv file'],
13 |     [bool, 'no_plot', False, 'if true then don\'t plot the results']
14 | ]
15 | 
16 | plot = True
17 | try:
18 |   matplotlib.use("GTK3Agg")
19 | except ImportError:
20 |   print("can't load matplotlib")
21 |   plot = False
22 | sns.set(style="darkgrid")
23 | 
24 | 
25 | def run_bash(cmd: str):
26 |   stream = os.popen(cmd)
27 |   return stream.read()
28 | 
29 | 
30 | def test_split_json():
31 |   print(split_json("{}{qsdf}{sdfqfh}"))
32 | 
33 | 
34 | def split_json(cmd_out):
35 |   jsons = []
36 |   n_accol = 0
37 |   previous_split_char = 0
38 |   for i, c in enumerate(cmd_out):
39 |     if c == '{':
40 |       n_accol += 1
41 |     if c == '}':
42 |       n_accol -= 1
43 |     if n_accol == 0:
44 |       # then split the json
45 |       jsons.append(cmd_out[previous_split_char: i+1])
46 |       previous_split_char = i+1
47 |   return jsons
48 | 
49 | 
50 | def parse_str(e):
51 |   if e is None:
52 |     return '0'
53 |   return str(e)
54 | 
55 | 
56 | def main():
57 |   global plot
58 |   args = argparse.parse_cmd(arguments)
59 |   if args['no_plot']:
60 |     plot = False
61 |   server = args['server']
62 |   remote_dir = args['remote_dir']
63 |   out = run_bash(f'ssh {server} "cd {remote_dir} && cat */trial.json"')
64 |   jsons = split_json(out)
65 |   csv_content = 'experiment_id,factor,framework,depth,score\n'
66 |   for trial in jsons:
67 |     trial = json.loads(trial)
68 |     values = trial['hyperparameters']['values']
69 |     csv_values = [trial['trial_id'], values['factor'], values['framework'], values['depth'], trial['score']]
70 |     csv_content += ','.join(list(map(parse_str, csv_values))) + '\n'
71 | 
72 |   with open(args['csv_path'], 'w') as f:
73 |     f.write(csv_content)
74 | 
75 |   if not plot:
76 |     return
77 | 
78 |   # plot the results
79 |   data = pd.read_csv(args['csv_path'])
80 |   print(data)
81 |   f, ax = plt.subplots(figsize=(9, 6))
82 |   data = data.drop_duplicates(subset=["factor", "depth"])
83 |   data = data.pivot("factor", "depth", "score")
84 |   print(data)
85 |   sns.heatmap(data, annot=True, vmin=0, vmax=1, cmap='CMRmap')
86 |   plt.show()
87 | 
88 | 
89 | if __name__ == "__main__":
90 |   # test_split_json()
91 |   main()
92 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # UpStride Classification API
 2 | 
 3 | [![TensorFlow 2.3](https://img.shields.io/badge/TensorFlow-2.3-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v2.3.0)
 4 | [![Python 3.6](https://img.shields.io/badge/Python-3.6-3776AB)](https://www.python.org/downloads/release/python-360/)
 5 | 
 6 | 
 7 | ## What is this repository?
 8 | 
 9 | Hi there 👋
10 | 
11 | We are really excited today to open our Github to the world! After months of research and development we decided to start giving back to the community with our first open-source repository.
12 | 
13 | We are sharing an image classification code that we use internally to benchmark our engine on several datasets.
14 | 
15 | This training script has also been shared to our clients to smooth the first time use of our product and  simplify the creation of state-of-the-art neural networks.
16 | 
17 | We hope it will bring value to you as well!
18 | 
19 | Here you will find how to convert your images dataset to TFRecord format, load them with an efficient data pipeline with various data augmentation strategies and classic deep learning models.
20 | 
21 | We are also working on an integration of Keras-Tuner for metaparameter search and Differential architectures search method for auto-ml experiments.
22 | 
23 | We will explain here how to use it with and without Upstride API. 
24 | 
25 | If you're interested in trying this script powered by Upstride technology 🚀 feel free to reach out to us at hello@upstride.io
26 | 
27 | 
28 | ## How do I get set up?
29 | 
30 | Start off by cloning this repository; be careful it uses git submodules, please proceed by doing a`git clone --recurse-submodules`. 
31 | 
32 | If you forgot to do it, don't panic you can still do a `git submodule update --init`.
33 | 
34 | The easiest way is to use docker; we provide 2 docker files, one to run these code using Tensorflow 2.3, and the other one to run UpStride 1.0. 
35 | 
36 | You can build them by using `make build` or `make build_tensorflow`.
37 | 
38 | The tensorflow docker image will soon be on dockerhub. 
39 | 
40 | 
41 | ## How do I start a training ?
42 | 
43 | ![training](ressources/training.gif)
44 | 
45 | 
46 | 
47 | See the 🎓 [Documentation](documentation/doc.md)
48 | 
49 | or
50 | 
51 | Get started with [cat vs dog classification example](documentation/cat_dog.ipynb)
52 | 
53 | ## Unittesting and code coverage
54 | * To run the unittests, run `python test.py`
55 | * To get the coverage, run `coverage run test.py`. Then `coverage report` show the coverage information and `coverage xml` create a file usable by VSCode
56 | 
57 | ## Would you like to contribute?
58 | 
59 | If you discovered a bug or have any idea please raise an issue. If you wish to contribute, pull requests are also welcome. 
60 | 
61 | We will try to review them as fast as possible to integrate your work in a timely manner. 
62 | 
63 | Unit-tests with pull requests are also welcome to smooth this process.
64 | 
65 | Thank you!
66 | ✌️
67 | 


--------------------------------------------------------------------------------
/src/models/vgg.py:
--------------------------------------------------------------------------------
 1 | """ code came from https://github.com/keras-team/keras-applications/blob/master/keras_applications/vgg16.py
 2 | """
 3 | import tensorflow as tf
 4 | from .generic_model import GenericModelBuilder
 5 | 
 6 | 
 7 | class VGG16(GenericModelBuilder):
 8 |   def model(self, x):
 9 |     # Block 1
10 |     x = self.layers.Conv2D(64//self.factor, (3, 3), padding='same', name='block1_conv1')(x)
11 |     x = self.layers.Activation('relu')(x)
12 |     x = self.layers.Conv2D(64//self.factor, (3, 3), padding='same', name='block1_conv2')(x)
13 |     x = self.layers.Activation('relu')(x)
14 |     x = self.layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
15 | 
16 |     # Block 2
17 |     x = self.layers.Conv2D(128//self.factor, (3, 3), padding='same', name='block2_conv1')(x)
18 |     x = self.layers.Activation('relu')(x)
19 |     x = self.layers.Conv2D(128//self.factor, (3, 3), padding='same', name='block2_conv2')(x)
20 |     x = self.layers.Activation('relu')(x)
21 |     x = self.layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
22 | 
23 |     # Block 3
24 |     x = self.layers.Conv2D(256//self.factor, (3, 3), padding='same', name='block3_conv1')(x)
25 |     x = self.layers.Activation('relu')(x)
26 |     x = self.layers.Conv2D(256//self.factor, (3, 3), padding='same', name='block3_conv2')(x)
27 |     x = self.layers.Activation('relu')(x)
28 |     x = self.layers.Conv2D(256//self.factor, (3, 3), padding='same', name='block3_conv3')(x)
29 |     x = self.layers.Activation('relu')(x)
30 |     x = self.layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
31 | 
32 |     # Block 4
33 |     x = self.layers.Conv2D(512//self.factor, (3, 3), padding='same', name='block4_conv1')(x)
34 |     x = self.layers.Activation('relu')(x)
35 |     x = self.layers.Conv2D(512//self.factor, (3, 3), padding='same', name='block4_conv2')(x)
36 |     x = self.layers.Activation('relu')(x)
37 |     x = self.layers.Conv2D(512//self.factor, (3, 3), padding='same', name='block4_conv3')(x)
38 |     x = self.layers.Activation('relu')(x)
39 |     x = self.layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
40 | 
41 |     # Block 5
42 |     x = self.layers.Conv2D(512//self.factor, (3, 3), padding='same', name='block5_conv1')(x)
43 |     x = self.layers.Activation('relu')(x)
44 |     x = self.layers.Conv2D(512//self.factor, (3, 3), padding='same', name='block5_conv2')(x)
45 |     x = self.layers.Activation('relu')(x)
46 |     x = self.layers.Conv2D(512//self.factor, (3, 3), padding='same', name='block5_conv3')(x)
47 |     x = self.layers.Activation('relu')(x)
48 |     x = self.layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
49 | 
50 |     # Classification block
51 |     x = self.layers.Flatten(name='flatten')(x)
52 |     x = self.layers.Dense(4096//self.factor, name='fc1')(x)
53 |     x = self.layers.Activation('relu')(x)
54 |     x = self.layers.Dense(4096//self.factor, name='fc2')(x)
55 |     x = self.layers.Activation('relu')(x)
56 |     return x
57 | 


--------------------------------------------------------------------------------
/src/models/squeezenet.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .generic_model import GenericModelBuilder
 3 | 
 4 | 
 5 | class SqueezeNet(GenericModelBuilder):
 6 |   def model(self, x):
 7 |     x = self.layers.Conv2D(filters=64 // self.factor, kernel_size=3, strides=2, padding='valid', name='conv1')(x)
 8 | 
 9 |     x = self.layers.Activation('relu', name='relu_conv1')(x)
10 |     x = self.layers.MaxPooling2D(pool_size=3, strides=2, name='pool1')(x)
11 | 
12 |     x = self.fire_module(x, fire_id=2, s1x1=16 // self.factor, e1x1=64 // self.factor, e3x3=64 // self.factor)
13 |     x = self.fire_module(x, fire_id=3, s1x1=16 // self.factor, e1x1=64 // self.factor, e3x3=64 // self.factor)
14 |     x = self.layers.MaxPooling2D(pool_size=3, strides=2, name='pool3')(x)
15 | 
16 |     x = self.fire_module(x, fire_id=4, s1x1=32 // self.factor, e1x1=128 // self.factor, e3x3=128 // self.factor)
17 |     x = self.fire_module(x, fire_id=5, s1x1=32 // self.factor, e1x1=128 // self.factor, e3x3=128 // self.factor)
18 |     x = self.layers.MaxPooling2D(pool_size=3, strides=2, name='pool5')(x)
19 | 
20 |     x = self.fire_module(x, fire_id=6, s1x1=48 // self.factor, e1x1=192 // self.factor, e3x3=192 // self.factor)
21 |     x = self.fire_module(x, fire_id=7, s1x1=48 // self.factor, e1x1=192 // self.factor, e3x3=192 // self.factor)
22 |     x = self.fire_module(x, fire_id=8, s1x1=64 // self.factor, e1x1=256 // self.factor, e3x3=256 // self.factor)
23 |     x = self.fire_module(x, fire_id=9, s1x1=64 // self.factor, e1x1=256 // self.factor, e3x3=256 // self.factor)
24 | 
25 |     x = self.layers.Dropout(0.5, name='drop9')(x)
26 |     x = self.layers.Conv2D(filters=self.num_classes, kernel_size=1, padding='valid', name='conv10')(x)
27 |     x = self.layers.Activation('relu', name='relu_conv10')(x)
28 |     x = self.layers.GlobalAveragePooling2D()(x)
29 |     return x
30 | 
31 |   def fire_module(self, x, fire_id, s1x1=16, e1x1=64, e3x3=64):
32 |     """tf.keras
33 | 
34 |     Args:
35 |         x: input from the previous layer
36 |         fire_id: id of fire module
37 |         s1x1: filter size of squeeze layer
38 |         e1x1: filter size of 1x1 expand layer
39 |         e3x3: filter size of 3x3 expand layer
40 |     Returns:
41 |         a keras tensor
42 |     """
43 | 
44 |     s_id = 'fire' + str(fire_id) + '/'
45 | 
46 |     x = self.layers.Conv2D(filters=s1x1, kernel_size=1, padding='valid', name=s_id + 'squeeze1x1_conv')(x)
47 |     x = self.layers.Activation('relu', name=s_id + 'squeeze1x1_relu')(x)
48 | 
49 |     expand1x1 = self.layers.Conv2D(filters=e1x1, kernel_size=1, padding='valid', name=s_id + 'expand1x1_conv')(x)
50 |     expand1x1 = self.layers.Activation('relu', name=s_id + 'expand1x1_relu')(expand1x1)
51 | 
52 |     expand3x3 = self.layers.Conv2D(filters=e3x3, kernel_size=3, padding='same', name=s_id + 'expand3x3_conv')(x)
53 |     expand3x3 = self.layers.Activation('relu', name=s_id + 'expand3x3_relu')(expand3x3)
54 | 
55 |     x = self.layers.Concatenate(axis=self.channel_axis, name=s_id + 'concat')([expand1x1, expand3x3])
56 |     
57 |     return x
58 | 


--------------------------------------------------------------------------------
/inference_client.py:
--------------------------------------------------------------------------------
 1 | import zmq
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | import upstride_argparse as argparse
 5 | from src.data import dataloader, augmentations
 6 | 
 7 | args_spec = [
 8 |     # dataloader specification to run inference on a public dataset
 9 |     [int, "num_classes", 0, 'Number of classes', lambda x: x > 0],
10 |     ['namespace', 'dataloader', [
11 |         ['list[str]', 'list', ['Resize', 'CentralCrop', 'Normalize'], 'Comma-separated list of data augmentation operations'],
12 |         [str, "data_dir", '', "directory to read/write data. Defaults to  \"~/tensorflow_datasets\""],
13 |         [str, 'name', None, 'Choose the dataset to be used', lambda x: not (x is None)],
14 |         [str, 'split_id', 'validation', 'Split id in the dataset to use'],
15 |         [int, 'batch_size', 1, 'The size of batch per gpu', lambda x: x > 0],
16 |     ] + augmentations.arguments],
17 | 
18 |     # networking parameters
19 |     [int, 'zmq_port', 5555, 'Specify the port to connect the ZMQ socket', lambda x: x > 0],
20 | ]
21 | 
22 | 
23 | def get_dataset(args):
24 |   args['dataloader']['train_split_id'] = None
25 |   dataset = dataloader.get_dataset(args['dataloader'], transformation_list=args['dataloader']['list'],
26 |                                      num_classes=args["num_classes"], split=args['dataloader']['split_id'])
27 |   return dataset
28 | 
29 | 
30 | def create_zmq_socket(port):
31 |   context = zmq.Context()
32 |   socket = context.socket(zmq.REQ)
33 |   socket.connect("tcp://localhost:" + str(port))
34 |   return socket
35 | 
36 | 
37 | def send_and_evaluate_record(record, socket):
38 |   img = record[0].numpy().astype('float16')
39 |   val = record[1].numpy()
40 |   socket.send(img)
41 |   reply = socket.recv()
42 |   res = np.frombuffer(reply, dtype='float32').reshape(val.shape)
43 |   total = val.shape[0]
44 |   correct = [val[j][np.argmax(res[j])] == 1 for j in range(total)].count(True)
45 |   return total, correct
46 | 
47 | 
48 | def send_and_evaluate_dataset(dataset, socket):
49 |   sent_records_count = 0
50 |   logging_frequency = 10
51 |   correct_count = 0
52 |   images_count = 0
53 |   for record in dataset:
54 |     if sent_records_count % logging_frequency == 0:
55 |       accuracy = 100.0 * correct_count / images_count if images_count > 0 else float("nan")
56 |       print("Records sent: %d, accuracy: %0.2f%%" % (sent_records_count, accuracy))
57 |     total, correct = send_and_evaluate_record(record, socket)
58 |     images_count = images_count + total
59 |     correct_count = correct_count + correct
60 |     sent_records_count += 1
61 | 
62 |   print("Total records sent:", sent_records_count)
63 |   return images_count, correct_count
64 | 
65 | 
66 | def main():
67 |   args = argparse.parse_cmd(args_spec)
68 |   dataset = get_dataset(args)
69 |   socket = create_zmq_socket(args['zmq_port'])
70 |   images_count, correct_count = send_and_evaluate_dataset(dataset, socket)
71 |   accuracy = correct_count / images_count
72 |   print("Accuracy of the remote model:", accuracy)
73 | 
74 | if __name__ == '__main__':
75 |   main()


--------------------------------------------------------------------------------
/scripts/alpha_viz.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import json
 3 | import matplotlib
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.cm as cm
 6 | import numpy as np
 7 | 
 8 | import upstride_argparse as argparse
 9 | 
10 | arguments = [
11 |     [str, "alpha_path", '', 'path of the alpha file to parse'],
12 |     [int, "epoch", 0, 'if different than 0 then visualize a single epoch'],
13 |     [int, "min", 0, 'if provided, then define the minimum epoch to visualize'],
14 |     [int, "max", 0, 'if provided, then define the maximum epoch to visualize'],
15 |     [int, "step", 100, 'number of steps between 2 epochs to visualize'],
16 |     ['list[str]', "params", [], 'if specified, list of parameters to  visualize']
17 | ]
18 | 
19 | 
20 | def prepare_data(args):
21 |   with open(args['alpha_path'], 'r') as f:
22 |     alphas = json.load(f)
23 |   # find min and max epochs
24 |   epochs = list(map(int, alphas.keys()))
25 |   epochs.sort()
26 | 
27 |   min_epoch = max(epochs[0], args['min']) if args['min'] else epochs[0]
28 |   max_epoch = min(epochs[-1], args['max']) if args['max'] else epochs[-1]
29 | 
30 |   if args['epoch']:
31 |     min_epoch = args['epoch']
32 |     max_epoch = args['epoch']
33 | 
34 |   # find parameters to visualize, and remove final '_savable'
35 |   params = alphas[str(min_epoch)].keys()
36 |   params = list(map(lambda x: x[:-8], alphas[str(min_epoch)].keys()))
37 |   if args['params']:
38 |     params = args['params']
39 |   print("display:", params)
40 |   print('epochs:', min_epoch, max_epoch)
41 |   return alphas, min_epoch, max_epoch, params
42 | 
43 | 
44 | def main():
45 |   matplotlib.use("GTK3Agg")
46 |   args = argparse.parse_cmd(arguments)
47 |   alphas, min_epoch, max_epoch, params = prepare_data(args)
48 |   number_bars = (max_epoch - min_epoch) // args["step"] + 1
49 |   colors = cm.OrRd_r(np.linspace(.2, .6, number_bars))
50 | 
51 |   # grid has a fixed number of columns of 5
52 |   n_params = len(params)
53 |   if n_params == 1:
54 |     fig, axs = plt.subplots(1, 1, figsize=(9, 3))
55 |     axs = [[axs]]
56 |   elif n_params <= 5:
57 |     fig, axs = plt.subplots(math.ceil(n_params), 1, figsize=(9, 3))
58 |     axs = [axs]
59 |   else:
60 |     fig, axs = plt.subplots(math.ceil(n_params/5), 5, figsize=(9, 3))
61 |   fig.suptitle(f'Alpha  parameter between {min_epoch} and {max_epoch} epochs (step: {args["step"]})')
62 | 
63 |   total_width = 0.7
64 |   width = total_width / number_bars
65 |   for i in range(number_bars):
66 |     epoch = min_epoch + i * args["step"]
67 |     for k, param in enumerate(params):
68 |       p = alphas[str(epoch)][param + '_savable']
69 | 
70 |       # TODO should be removed as soon as data is better formated
71 |       data = {}
72 |       for j in range(len(p)):
73 |         data[str(j)] = p[j]
74 | 
75 |       names = list(data.keys())
76 |       values = list(data.values())
77 | 
78 |       x = np.arange(len(p))
79 |       axs[k//5][k % 5].bar(x - total_width/2 + width * i, values, width, label=str(i)) #, color=colors)
80 |       axs[k//5][k%5].set_title(param)
81 |       
82 |   # fig.tight_layout()
83 |   plt.show()
84 | 
85 | 
86 | if __name__ == "__main__":
87 |   main()
88 | 


--------------------------------------------------------------------------------
/scripts/hyperband_results_parser.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import sys
  4 | import seaborn as sns
  5 | import pandas as pd
  6 | import matplotlib
  7 | import matplotlib.pyplot as plt
  8 | import numpy as np
  9 | 
 10 | plot = True
 11 | try:
 12 |   matplotlib.use("GTK3Agg")
 13 | except ImportError:
 14 |   print("can't load matplotlib")
 15 |   plot = False
 16 | 
 17 | 
 18 | 
 19 | def main(csv_path):
 20 |   if not os.path.exists(csv_path):
 21 |     all_data = []
 22 |     all_keys = ['score']
 23 |     for d in os.listdir('.'):
 24 |       if not os.path.isdir(d):
 25 |         continue
 26 |       with open(os.path.join(d, 'trial.json'), 'r') as f:
 27 |         json_data = json.load(f)
 28 |       data = json_data['hyperparameters']['values']
 29 |       data['score'] = json_data['score']
 30 |       data['name'] = d
 31 |       all_data.append(data)
 32 |       for key in data:
 33 |         if key not in all_keys:
 34 |           all_keys.append(key)
 35 |     # write csv
 36 |     with open(csv_path, 'w') as f:
 37 |       f.write(",".join(all_keys))
 38 |       f.write("\n")
 39 |       for data in all_data:
 40 |         to_write = []
 41 |         for key in all_keys:
 42 |           if key in data:
 43 |             if data[key] is None:
 44 |               data[key] = 0
 45 |             to_write.append(str(data[key]))
 46 |           else:
 47 |             to_write.append('')
 48 |         f.write(','.join(to_write))
 49 |         f.write('\n')
 50 |     print(f"file {csv_path} written")
 51 |   # also start a visu with seaborn
 52 | 
 53 |   if not plot:
 54 |     return
 55 |   sns.set(style="darkgrid")
 56 |   data = pd.read_csv(csv_path)
 57 | 
 58 |   # data['depth'] = data['conv3_depth'] + data['conv4_depth']
 59 |   data['depth'] *= 2
 60 |   depths = data['depth'].unique()
 61 |   depths.sort()
 62 |   factors = data['factor'].unique()
 63 |   factors.sort()
 64 |   if 'tuner/trial_id' in data:
 65 |     data = data.drop(columns=['tuner/trial_id'])
 66 | 
 67 |   g = sns.FacetGrid(data, col="tuner/epochs", legend_out=True)
 68 |   g.map_dataframe(draw_heatmap, 'factors', 'depths', factors=factors, depths=depths)
 69 |   g.add_legend()
 70 |   plt.show()
 71 | 
 72 | def draw_heatmap(*args, **kwargs):
 73 |   '''from https://stackoverflow.com/questions/41471238/how-to-make-heatmap-square-in-seaborn-facetgrid
 74 |   '''
 75 |   data = kwargs.pop('data')
 76 |   data['score'] = data['score'] * 100 
 77 |   data = data.pivot(index='depth', columns='factor', values='score')
 78 |   # add missing factor
 79 |   for f in kwargs['factors']:
 80 |     if f not in data:
 81 |       data[f] = np.nan
 82 |   # add missing indexes
 83 |   index_to_add = []    
 84 |   for d in kwargs['depths']:
 85 |     if d not in data.index:
 86 |       index_to_add.append(d)
 87 |   new_indexes = data.index.values.tolist() + index_to_add
 88 |   new_indexes.sort()      
 89 |   data = data.reindex(new_indexes)
 90 |   data = data.reindex(sorted(data.columns), axis=1)
 91 | 
 92 |   sns.heatmap(data, annot=True, vmin=0, vmax=100, cmap='CMRmap')
 93 | 
 94 |   # sns.heatmap(matrix, annot=True, linewidth=0.5, xticklabels=factors, yticklabels=depths, vmin=0, vmax=100, cmap='CMRmap', mask=mask)
 95 | 
 96 | 
 97 | if __name__ == "__main__":
 98 |   csv_path = sys.argv[1]
 99 |   main(csv_path)
100 | 


--------------------------------------------------------------------------------
/src/models/__init__.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import tensorflow as tf
 3 | import tensorflow.keras.layers as tf_layers
 4 | 
 5 | from .alexnet import AlexNet, AlexNetQ, AlexNetToy
 6 | from .mobilenet import MobileNetV2, MobileNetV2Cifar10, MobileNetV2Cifar10_2, MobileNetV2Cifar10Hyper
 7 | from .mobilenet_v3 import MobileNetV3Large, MobileNetV3Small, MobileNetV3LargeCIFAR, MobileNetV3SmallCIFAR
 8 | 
 9 | from .resnet import (ResNet18, ResNet34, ResNet50, ResNet101, ResNet152,
10 |                      ResNet20CIFAR, ResNet32CIFAR, ResNet44CIFAR, ResNet56CIFAR, ResNetHyper)
11 | from .wide_resnet import WideResNet28_10, WideResNet40_2
12 | from .squeezenet import SqueezeNet
13 | from .tiny_darknet import TinyDarknet
14 | from .vgg import VGG16
15 | from .nasnet import NASNetLarge, NASNetMobile, NASNetCIFAR
16 | from .efficientnet import EfficientNetB0, EfficientNetB1, EfficientNetB2, EfficientNetB3, EfficientNetB4, EfficientNetB5, EfficientNetB6, EfficientNetB7
17 | from .hypermodels import SimpleHyper
18 | from .fbnet_mobilenet import FBNet_MobileNetV2Imagenet, FBNet_MobileNetV2CIFAR, FBNet_MobileNetV2CIFARUP
19 | from .pdart import PdartsCIFAR, PdartsImageNet
20 | from .complexnet import ShallowComplexNet, DeepComplexNet, WSComplexNetTF, WSComplexNetUpStride, DNComplexNetTF, DNComplexNetUpStride, IBComplexNetTF, IBComplexNetUpStride
21 | 
22 | 
23 | # to prevent Keras to bug for too big models.
24 | # for instance ResNet152 with type2 does not work without this
25 | sys.setrecursionlimit(10000)
26 | 
27 | model_name_to_class = {
28 |     "AlexNet": AlexNet,
29 |     "AlexNetQ": AlexNetQ,
30 |     "AlexNetToy": AlexNetToy,
31 |     "EfficientNetB0": EfficientNetB0,
32 |     "EfficientNetB1": EfficientNetB1,
33 |     "EfficientNetB2": EfficientNetB2,
34 |     "EfficientNetB3": EfficientNetB3,
35 |     "EfficientNetB4": EfficientNetB4,
36 |     "EfficientNetB5": EfficientNetB5,
37 |     "EfficientNetB6": EfficientNetB6,
38 |     "EfficientNetB7": EfficientNetB7,
39 |     "MobileNetV2": MobileNetV2,
40 |     "MobileNetV2Cifar10": MobileNetV2Cifar10,
41 |     "MobileNetV2Cifar10_2": MobileNetV2Cifar10_2,
42 |     "NASNetCIFAR": NASNetCIFAR,
43 |     "NASNetLarge": NASNetLarge,
44 |     "NASNetMobile": NASNetMobile,
45 |     "ResNet18": ResNet18,
46 |     "ResNet34": ResNet34,
47 |     "ResNet50": ResNet50,
48 |     "ResNet101": ResNet101,
49 |     "ResNet152": ResNet152,
50 |     "ResNet20CIFAR": ResNet20CIFAR,
51 |     "ResNet32CIFAR": ResNet32CIFAR,
52 |     "ResNet44CIFAR": ResNet44CIFAR,
53 |     "ResNet56CIFAR": ResNet56CIFAR,
54 |     "WideResNet28_10": WideResNet28_10,
55 |     "WideResNet40_2": WideResNet40_2,
56 |     "SqueezeNet": SqueezeNet,
57 |     "TinyDarknet": TinyDarknet,
58 |     "VGG16": VGG16,
59 |     "MobileNetV3Large": MobileNetV3Large,
60 |     "MobileNetV3Small": MobileNetV3Small,
61 |     "MobileNetV3LargeCIFAR": MobileNetV3LargeCIFAR,
62 |     "MobileNetV3SmallCIFAR": MobileNetV3SmallCIFAR,
63 |     # Pdart model
64 |     "PdartsCIFAR": PdartsCIFAR,
65 |     "PdartsImageNet": PdartsImageNet,
66 |     # FIXME Below commented models have stale code and needs refactoring when prioritized.
67 |     # # Hyper Model
68 |     # "SimpleHyper": SimpleHyper,
69 |     # "ResNetHyper": ResNetHyper,
70 |     # "MobileNetV2Cifar10Hyper": MobileNetV2Cifar10Hyper,
71 |     # # Architecture Search models
72 |     # "FBNet_MobileNetV2Imagenet": FBNet_MobileNetV2Imagenet,
73 |     # "FBNet_MobileNetV2CIFAR": FBNet_MobileNetV2CIFAR,
74 |     # "FBNet_MobileNetV2CIFARUP": FBNet_MobileNetV2CIFARUP,
75 |     # complexnet
76 |     "ShallowComplexNet": ShallowComplexNet,
77 |     "DeepComplexNet": DeepComplexNet,
78 |     "WSComplexNetTF": WSComplexNetTF,
79 |     "WSComplexNetUpStride": WSComplexNetUpStride,
80 |     "DNComplexNetTF": DNComplexNetTF,
81 |     "DNComplexNetUpStride": DNComplexNetUpStride,
82 |     "IBComplexNetTF": IBComplexNetTF,
83 |     "IBComplexNetUpStride": IBComplexNetUpStride,
84 | }
85 | 


--------------------------------------------------------------------------------
/tests/unit_tests/test_channels_first_last.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import unittest
 3 | 
 4 | from tqdm import tqdm
 5 | 
 6 | import tensorflow as tf
 7 | from src.models import model_name_to_class
 8 | 
 9 | class TestCompareChannelsFirstLast(unittest.TestCase):
10 |   @classmethod
11 |   def setUpClass(cls):
12 |     cls.model_kwargs = {
13 |       'input_size': [224, 224, 3],
14 |       'changing_ids': [],
15 |       'num_classes': 10,
16 |     }
17 | 
18 |     cls.list_of_models = model_name_to_class.values()
19 | 
20 |     # below model do not work for upstride types
21 |     remove_models = [
22 |       # These models do not match EfficientDet definition. There is a open Pull request yet to be merged. 
23 |       # This should be included once the pull request is merged.
24 |       "EfficientNetB0", 
25 |       "EfficientNetB1",
26 |       "EfficientNetB2",
27 |       "EfficientNetB3",
28 |       "EfficientNetB4",
29 |       "EfficientNetB5",
30 |       "EfficientNetB6",
31 |       "EfficientNetB7",
32 |       # SeparableConv2D is not supported for upstride types
33 |       "NASNetCIFAR",
34 |       "NASNetLarge",
35 |       "NASNetMobile",
36 |     ]
37 | 
38 |     tmp_list_models = model_name_to_class
39 |     # remove models that are not supported
40 |     [tmp_list_models.pop(model) for model in remove_models]
41 |     cls.list_models_upstride = tmp_list_models.values()
42 | 
43 |   def test_compare_model_params_tensorflow(self):
44 |     self.model_kwargs.update({"upstride_type": -1, "factor": 1})
45 |     print("Building models for Channels_first and Channels_last for Tensorflow and Compare") 
46 |     for model in tqdm(self.list_of_models):
47 |       # switch to channels first
48 |       tf.keras.backend.set_image_data_format('channels_first')
49 |       model_NCHW = model(**self.model_kwargs).build()
50 |       model_NCHW_params = model_NCHW.count_params()
51 |       del model_NCHW
52 |       tf.keras.backend.clear_session()
53 |       # switch back to channels last
54 |       tf.keras.backend.set_image_data_format('channels_last')
55 |       model_NHWC = model(**self.model_kwargs).build()
56 |       model_NHWC_params = model_NHWC.count_params()
57 |       del model_NHWC
58 |       tf.keras.backend.clear_session()
59 |       # compare
60 |       # print(f"Model Name    : {model.__name__} TensorFlow")
61 |       # print(f"Channels_last : {model_NHWC_params:,}")
62 |       # print(f"Channels_first: {model_NCHW_params:,}") 
63 |       self.assertEqual(model_NHWC_params, model_NCHW_params)
64 | 
65 |   def test_compare_model_params_upstride(self):
66 |     # try to import upstride module
67 |     try:
68 |       import upstride
69 |       for up_type in [1, 2]: 
70 |         self.model_kwargs.update({"upstride_type": up_type, "factor": 2**up_type}) 
71 |         print(f"Building models for Channels_first and Channels_last for Upstride type{up_type} and Compare") 
72 |         for model in tqdm(self.list_models_upstride):
73 |           # switch to channels first
74 |           tf.keras.backend.set_image_data_format('channels_first')
75 |           model_NCHW = model(**self.model_kwargs).build()
76 |           model_NCHW_params = model_NCHW.count_params()
77 |           del model_NCHW
78 |           tf.keras.backend.clear_session()
79 |           # switch back to channels last
80 |           tf.keras.backend.set_image_data_format('channels_last')
81 |           model_NHWC = model(**self.model_kwargs).build()
82 |           model_NHWC_params = model_NHWC.count_params()
83 |           del model_NHWC
84 |           tf.keras.backend.clear_session()
85 |           # compare
86 |           # print(f"Model Name    : {model.__name__} UpStride type{up_type}")
87 |           # print(f"Channels_last : {model_NHWC_params:,}")
88 |           # print(f"Channels_first: {model_NCHW_params:,}") 
89 |           self.assertEqual(model_NHWC_params, model_NCHW_params)
90 |     except ModuleNotFoundError:
91 |       print("Unit test test_compare_model_params_upstride skipped as upstride is required for this test")
92 | 


--------------------------------------------------------------------------------
/inference_server.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import zmq
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | import upstride_argparse as argparse
  6 | from src.data import dataloader, augmentations
  7 | from submodules.global_dl import global_conf
  8 | 
  9 | args_spec = [
 10 |     # framework specification
 11 |     [str, 'model_dir', None, 'Path to a folder containing saved model', lambda x: os.path.exists(x)],
 12 | 
 13 |     # dataloader specification to run inference on a dataset
 14 |     [int, "num_classes", 0, 'Number of classes'],
 15 |     ['namespace', 'dataloader', [
 16 |         ['list[str]', 'list', ['Resize', 'CentralCrop', 'Normalize'], 'Comma-separated list of data augmentation operations'],
 17 |         [str, "data_dir", '', "directory to read/write data. Defaults to  \"~/tensorflow_datasets\""],
 18 |         [str, 'name', None, 'Choose the dataset to be used'],
 19 |         [str, 'split_id', 'validation', 'Split id in the dataset to use'],
 20 |         [int, 'batch_size', 1, 'The size of batch per gpu', lambda x: x > 0],
 21 |     ] + augmentations.arguments],
 22 | 
 23 |     # networking parameters
 24 |     [int, 'zmq_port', 5555, 'Specify the port to connect the ZMQ socket', lambda x: x > 0],
 25 | ] + global_conf.arguments
 26 | 
 27 | 
 28 | 
 29 | def load_model(args):
 30 |   from train import get_experiment_name
 31 |   # import upstride to enable model deserialization
 32 |   import upstride.type0.tf.keras.layers
 33 |   import upstride.type2.tf.keras.layers
 34 |   print("Loading model from", args['model_dir'])
 35 |   model = tf.keras.models.load_model(args['model_dir'], compile=False)    # compile=True fails on nano, maybe due to TF 2.2/2.3 difference
 36 |   model.compile(loss='categorical_crossentropy')
 37 |   return model
 38 | 
 39 | 
 40 | def evaluate_dataset(args, model):
 41 |     print(f"Evaluating on {args['dataloader']['name']}")
 42 |     args['dataloader']['train_split_id'] = None
 43 |     dataset = dataloader.get_dataset(args['dataloader'], transformation_list=args['dataloader']['val_list'],
 44 |                                      num_classes=args["num_classes"], split=args['dataloader']['val_split_id'])
 45 |     model.evaluate(dataset)
 46 | 
 47 | 
 48 | def create_zmq_socket(zmq_port):
 49 |   context = zmq.Context()
 50 |   socket = context.socket(zmq.REP)
 51 |   socket.bind("tcp://*:" + str(zmq_port))
 52 |   return socket
 53 | 
 54 | 
 55 | def process_incoming_image_batches(model, shape, socket):
 56 |   received_messages_count = 0
 57 |   logging_frequency = 1000
 58 | 
 59 |   # set batch dimension to -1 for reshaping
 60 |   if shape[0] is None:
 61 |       shape[0] = -1
 62 | 
 63 |   # loop forever processing incoming messages
 64 |   print(f"Listening to network...")
 65 |   while True:
 66 |     # wait for a message
 67 |     message = socket.recv()
 68 |     if received_messages_count % logging_frequency == 0:
 69 |       print(f"Processed {received_messages_count} messages")
 70 |     # convert the message to image
 71 |     img = np.frombuffer(message, dtype='float16').reshape(shape)
 72 |     res = model.predict(img)
 73 |     socket.send(res)
 74 |     received_messages_count += 1
 75 | 
 76 | 
 77 | def main():
 78 |   """ CLI entry point
 79 |   """
 80 |   # parse arguments
 81 |   args = argparse.parse_cmd(args_spec)
 82 | 
 83 |   # perform global configuration (XLA and memory growth)
 84 |   global_conf.config_tf2(args)
 85 | 
 86 |   # load model
 87 |   model = load_model(args)
 88 |   model.summary()
 89 | 
 90 |   # if dataloader.name is set, evaluating on a specific dataset
 91 |   if args['dataloader']['name'] is not None:
 92 |     evaluate_dataset(args, model)
 93 | 
 94 |   # otherwise for images listen to a zmq socket
 95 |   else:
 96 |     socket = create_zmq_socket(args['zmq_port'])
 97 |     assert len(model.inputs) == 1, "Cannot find model input to send images on"
 98 |     process_incoming_image_batches(model, list(model.inputs[0].shape), socket)
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |   main()
103 | 


--------------------------------------------------------------------------------
/src/test_argument_parser.py:
--------------------------------------------------------------------------------
  1 | import shutil
  2 | import os
  3 | import tempfile
  4 | import json
  5 | import yaml
  6 | import unittest
  7 | import argparse
  8 | from unittest import mock
  9 | from .argument_parser import read_yaml_config, parse_cmd
 10 | 
 11 | 
 12 | class TestArgumentParser(unittest.TestCase):
 13 |   def test_read_yaml_config(self):
 14 |     config_dir = create_yaml_file()
 15 |     parameters = init_parameters()
 16 |     read_yaml_config(os.path.join(config_dir, "config.yml"), parameters)
 17 |     self.assertEqual(parameters['parameter_int'], 2)
 18 |     self.assertEqual(parameters['parameter_str'], "plip")
 19 |     self.assertEqual(parameters['parameter_list'], [1, 2, 3])
 20 |     self.assertEqual(parameters['parameter_bool'], True)
 21 |     self.assertEqual(parameters['parameter_dict']['parameter_int'], 3)
 22 |     shutil.rmtree(config_dir)
 23 | 
 24 |   @mock.patch('argparse.ArgumentParser.parse_args',
 25 |               return_value=argparse.Namespace())
 26 |   def test_parse_empty_config(self, mock_args):
 27 |     arguments = get_arguments()
 28 |     parameters = parse_cmd(arguments)
 29 |     out_parameters = {
 30 |         "parameter_int": 0,
 31 |         "parameter_str": '',
 32 |         "parameter_list": [1, 5, 6],
 33 |         "parameter_bool": False,
 34 |         "parameter_dict": {
 35 |             "parameter_int": 5
 36 |         }
 37 |     }
 38 |     self.assertEqual(parameters, out_parameters)
 39 | 
 40 |   @mock.patch('argparse.ArgumentParser.parse_args',
 41 |               return_value=argparse.Namespace(**{"yaml_config": ['ressources/testing/config.yml']}))
 42 |   def test_parse_json_config(self, mock_args):
 43 |     arguments = get_arguments()
 44 |     arguments.append([str, "json_config", "", "config file overriden by these argparser parameters"])
 45 |     parameters = parse_cmd(arguments)
 46 |     self.assertEqual(parameters['parameter_int'], 1)
 47 |     self.assertEqual(parameters['parameter_str'], "plop")
 48 |     self.assertEqual(parameters['parameter_list'], [1, 2, 3])
 49 |     self.assertEqual(parameters['parameter_bool'], True)
 50 |     self.assertEqual(parameters['parameter_dict']['parameter_int'], 3)
 51 | 
 52 |   @mock.patch('argparse.ArgumentParser.parse_args',
 53 |               return_value=argparse.Namespace(**{"yaml_config": ['ressources/testing/config.yml'], "parameter_int": -1}))
 54 |   def test_parse_json_mix_config(self, mock_args):
 55 |     arguments = get_arguments()
 56 |     arguments.append([str, "json_config", "", "config file overriden by these argparser parameters"])
 57 |     arguments.append([str, "other_param", "test", "plop"])
 58 |     parameters = parse_cmd(arguments)
 59 |     self.assertEqual(parameters['parameter_int'], -1)
 60 |     self.assertEqual(parameters['parameter_str'], "plop")
 61 |     self.assertEqual(parameters['parameter_list'], [1, 2, 3])
 62 |     self.assertEqual(parameters['parameter_bool'], True)
 63 |     self.assertEqual(parameters['other_param'], "test")
 64 | 
 65 | 
 66 | def get_arguments():
 67 |   return [
 68 |       [int, "parameter_int", 0, "", lambda x: x < 2],
 69 |       [str, "parameter_str", "", ""],
 70 |       ['list[int]', "parameter_list", [1, 5, 6], ""],
 71 |       [bool, "parameter_bool", False, ""],
 72 |       ['namespace', 'parameter_dict', [
 73 |           [int, 'parameter_int', 5, '']
 74 |       ]]
 75 |   ]
 76 | 
 77 | 
 78 | def create_yaml_file():
 79 |   config_dir = tempfile.mkdtemp()
 80 |   yaml_content = {
 81 |       "parameter_int": 2,
 82 |       "parameter_str": "plip",
 83 |       "parameter_list": [1, 2, 3],
 84 |       "parameter_bool": True,
 85 |       "parameter_dict": {
 86 |           "parameter_int": 3
 87 |       }
 88 |   }
 89 |   with open(os.path.join(config_dir, 'config.yml'), 'w') as outfile:
 90 |     yaml.dump(yaml_content, outfile)
 91 |   return config_dir
 92 | 
 93 | 
 94 | def init_parameters():
 95 |   parameters = {
 96 |       "parameter_int": None,
 97 |       "parameter_str": None,
 98 |       "parameter_list": None,
 99 |       "parameter_bool": None,
100 |       "parameter_dict": {
101 |           "parameter_int": None
102 |       }
103 |   }
104 |   return parameters
105 | 


--------------------------------------------------------------------------------
/src/test_tfrecord_extractor.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import shutil
  4 | import tempfile
  5 | import unittest
  6 | import cv2
  7 | import numpy as np
  8 | from src.data.dataloader import TFRecordExtractor
  9 | 
 10 | sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 'scripts'))
 11 | from tfrecord_by_separate_dir_or_annotation_file import build_tfrecord_dataset
 12 | 
 13 | 
 14 | class TestTfrecordExtractor(unittest.TestCase):
 15 |     def test_process(self):
 16 |         tfrecord_dir_path, dataset_name = create_dataset()
 17 | 
 18 |         # Retrieve the train tf records
 19 |         train_dataset_extractor = TFRecordExtractor(dataset_name, tfrecord_dir_path, "train")
 20 |         for image, label in train_dataset_extractor.get_tf_dataset().take(1):
 21 |             # Check train image shape
 22 |             self.assertEqual(image.shape, (640, 480, 3))
 23 | 
 24 |         val_dataset_extractor = TFRecordExtractor(dataset_name, tfrecord_dir_path, "validation")
 25 |         for image, label in val_dataset_extractor.get_tf_dataset().take(1):
 26 |             # Check validation image shape
 27 |             self.assertEqual(image.shape, (520, 380, 3))
 28 | 
 29 |         test_dataset_extractor = TFRecordExtractor(dataset_name, tfrecord_dir_path, "test")
 30 |         for image, label in test_dataset_extractor.get_tf_dataset().take(1):
 31 |             # Check test image shape
 32 |             self.assertEqual(image.shape, (520, 380, 3))
 33 | 
 34 |         shutil.rmtree(tfrecord_dir_path)
 35 | 
 36 | 
 37 | def create_dataset():
 38 |     TRAIN_EXAMPLE_PER_CLASS = 10
 39 |     VAL_EXAMPLE_PER_CLASS = 5
 40 |     TEST_EXAMPLE_PER_CLASS = 4
 41 |     train_dir = create_fake_dataset_from_directory(TRAIN_EXAMPLE_PER_CLASS)
 42 |     val_dir, val_annotation_file = create_fake_dataset_with_annotation_file(VAL_EXAMPLE_PER_CLASS)
 43 |     test_dir, test_annotation_file = create_fake_dataset_with_annotation_file(TEST_EXAMPLE_PER_CLASS)
 44 |     name = 'Test-dataset'
 45 |     description = 'A small test datset'
 46 |     tfrecord_dir_path = tempfile.mkdtemp()
 47 | 
 48 |     args = {'name': name, 'description': description, 'tfrecord_dir_path': tfrecord_dir_path,
 49 |             'tfrecord_size': 2, 'preprocessing': 'NO', 'image_size': (224, 224),
 50 |             'train': {'images_dir_path': train_dir,
 51 |                       'annotation_file_path': None,
 52 |                       'delimiter': ',',
 53 |                       'header_exists': False,
 54 |                       },
 55 |             'validation': {'images_dir_path': val_dir,
 56 |                            'annotation_file_path': val_annotation_file,
 57 |                            'delimiter': ',',
 58 |                            'header_exists': False,
 59 |                            },
 60 |             'test': {'images_dir_path': test_dir,
 61 |                      'annotation_file_path': test_annotation_file,
 62 |                      'delimiter': ',',
 63 |                      'header_exists': False,
 64 |                      }
 65 |             }
 66 |     build_tfrecord_dataset(args)
 67 | 
 68 |     shutil.rmtree(train_dir)
 69 |     shutil.rmtree(val_dir)
 70 |     shutil.rmtree(test_dir)
 71 | 
 72 |     return tfrecord_dir_path, name
 73 | 
 74 | 
 75 | def create_fake_dataset_from_directory(n_images_per_class=2):
 76 |   dataset_dir = tempfile.mkdtemp()
 77 |   os.makedirs(os.path.join(dataset_dir, 'cat'), exist_ok=True)
 78 |   os.makedirs(os.path.join(dataset_dir, 'dog'), exist_ok=True)
 79 |   for i in range(n_images_per_class):
 80 |     cv2.imwrite(os.path.join(dataset_dir, 'dog', '{}.jpg'.format(i)), np.ones((640, 480, 3), dtype=np.uint8) * 255)
 81 |     cv2.imwrite(os.path.join(dataset_dir, 'cat', '{}.jpg'.format(i)), np.ones((640, 480, 3), dtype=np.uint8) * 255)
 82 |   return dataset_dir
 83 | 
 84 | 
 85 | def create_fake_dataset_with_annotation_file(n_images_per_class=2):
 86 |   dataset_dir = tempfile.mkdtemp()
 87 |   os.makedirs(dataset_dir, exist_ok=True)
 88 | 
 89 |   annotation_file = os.path.join(dataset_dir, 'annotations.txt')
 90 | 
 91 |   labels = ['cat', 'dog']
 92 | 
 93 |   with open(annotation_file, 'w', encoding='utf-8') as f:
 94 |     for i in range(n_images_per_class*2):
 95 |       cv2.imwrite(os.path.join(dataset_dir, '{}.jpg'.format(i)), np.ones((520, 380, 3), dtype=np.uint8) * 255)
 96 |       line = '{}.jpg'.format(i) + "," + labels[i % 2] + "\n"
 97 |       f.write(line)
 98 | 
 99 |   return dataset_dir, annotation_file
100 | 


--------------------------------------------------------------------------------
/train_keras_tuner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tensorflow as tf
  3 | import upstride_argparse as argparse
  4 | from kerastuner.tuners import Hyperband, BayesianOptimization
  5 | from src.data import dataloader
  6 | from src.models import model_name_to_class
  7 | from src.models.generic_model import framework_list
  8 | from src.utils import check_folder, get_imagenet_data, model_dir
  9 | from submodules.global_dl import global_conf
 10 | from submodules.global_dl.training.training import create_env_directories, setup_mp, define_model_in_strategy, get_callbacks, init_custom_checkpoint_callbacks
 11 | from submodules.global_dl.training import training
 12 | from submodules.global_dl.training import alchemy_api
 13 | from submodules.global_dl.training import export
 14 | from submodules.global_dl.training.optimizers import get_lr_scheduler, get_optimizer, arguments
 15 | from submodules.global_dl.training import optimizers
 16 | 
 17 | 
 18 | arguments = [
 19 |     ['namespace', 'dataloader', dataloader.arguments],
 20 |     ['namespace', 'server', alchemy_api.arguments],
 21 |     ['namespace', 'optimizer', optimizers.arguments],
 22 |     ['namespace', 'export', export.arguments],
 23 |     ['list[str]', 'frameworks', ['tensorflow'], 'List of framework to use to define the model', lambda x: not any(y not in framework_list for y in x)],
 24 |     ['namespace', 'factor', [[str, 'scale', 'log', 'linear or log'], [float, 'min', 1, ''], [float, 'max', 1, ''], [float, 'step', 0, ''], ]],
 25 |     [str, "model_name", '', 'Specify the name of the model', lambda x: x in model_name_to_class],
 26 | 
 27 | ] + global_conf.arguments + training.arguments
 28 | 
 29 | 
 30 | def main():
 31 |   """ function called when starting the code via command-line
 32 |   """
 33 |   args = argparse.parse_cmd(arguments)
 34 |   args['server'] = alchemy_api.start_training(args['server'])
 35 |   train(args)
 36 | 
 37 | 
 38 | def get_values_from_args(args):
 39 |   if args['scale'] == 'linear':
 40 |     values = list(range(args['min'], args['max'], args['step']))
 41 |   elif args['scale'] == 'log':
 42 |     values = []
 43 |     previous = args['min']
 44 |     while previous <= args['max']:
 45 |       values.append(previous)
 46 |       previous *= args['step']
 47 |   else:
 48 |     raise ValueError(f"unknown scale '{args['scale']}'")
 49 |   return values
 50 | 
 51 | 
 52 | def get_model(args):
 53 |   def build_model(hp):
 54 |     factor = hp.Choice('factor', get_values_from_args(args['factor']), ordered=True)
 55 |     framework = hp.Choice('framework', args['frameworks'])
 56 |     model = model_name_to_class[args['model_name']](framework,
 57 |                                                     factor,
 58 |                                                     args['input_size'],
 59 |                                                     args['num_classes'],
 60 |                                                     hp=hp).model
 61 |     model.compile(
 62 |         optimizer=get_optimizer(args['optimizer']),
 63 |         loss='categorical_crossentropy',
 64 |         metrics=['accuracy'])
 65 |     return model
 66 |   return build_model
 67 | 
 68 | 
 69 | def get_experiment_name(args):
 70 |   experiment_dir = f"keras_tuner_{args['model_name']}"
 71 |   if args['configuration']['with_mixed_precision']:
 72 |     experiment_dir += "_mp"
 73 |   return experiment_dir
 74 | 
 75 | 
 76 | def train(args):
 77 |   print(args)
 78 |   global_conf.config_tf2(args)
 79 |   checkpoint_dir, log_dir, export_dir = create_env_directories(args, get_experiment_name(args))
 80 | 
 81 |   train_dataset = dataloader.get_dataset(args['dataloader'], transformation_list=args['dataloader']['train_list'],
 82 |                                          num_classes=args["num_classes"], split=args['dataloader']['train_split_id'])
 83 |   val_dataset = dataloader.get_dataset(args['dataloader'], transformation_list=args['dataloader']['val_list'],
 84 |                                        num_classes=args["num_classes"], split=args['dataloader']['val_split_id'])
 85 | 
 86 |   setup_mp(args)
 87 |   build_model_fn = get_model(args)
 88 |   callbacks = get_callbacks(args, log_dir)
 89 | 
 90 |   # tuner = Hyperband(build_model_fn,
 91 |   #                   objective='val_accuracy',
 92 |   #                   max_epochs=args['num_epochs'],
 93 |   #                   hyperband_iterations=10e100,
 94 |   #                   directory=checkpoint_dir)
 95 | 
 96 |   tuner = BayesianOptimization(build_model_fn,
 97 |                     objective='val_accuracy',
 98 |                     max_trials=100000,
 99 |                     num_initial_points=10,
100 |                     directory=checkpoint_dir)
101 | 
102 |   tuner.search_space_summary()
103 |   tuner.search(x=train_dataset,
104 |                validation_data=val_dataset,
105 |                callbacks=callbacks,
106 |                epochs=args['num_epochs'])
107 |   tuner.results_summary()
108 | 
109 | 
110 | if __name__ == '__main__':
111 |   main()
112 | 


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import glob
  3 | import os
  4 | import random
  5 | import cv2
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | from shutil import copyfile
  9 | 
 10 | 
 11 | def copy_and_resize(source, dest, img_size):
 12 |   os.makedirs(dest, exist_ok=True)
 13 | 
 14 |   images_extensions = [".jpg", ".png", ".JPEG"]
 15 | 
 16 |   if source[-1] == "/":
 17 |     source = source[:-1]
 18 |   sources_len = len(source)
 19 |   for root, dirs, files in os.walk(source):
 20 |     for d in dirs:
 21 |       os.makedirs(os.path.join(dest, root[sources_len+1:], d), exist_ok=True)
 22 |     for f in files:
 23 |       if os.path.splitext(f)[1] in images_extensions:
 24 |         # then load, resize and save
 25 |         image = cv2.imread(os.path.join(root, f))
 26 |         image = cv2.resize(image, (img_size, img_size))
 27 |         r = cv2.imwrite(os.path.join(dest, root[sources_len+1:], f), image)
 28 |         if r == False:
 29 |           raise Exception("issue writing image {}".format(os.path.join(dest, root[sources_len+1:], f)))
 30 |       else:
 31 |         # simple copy
 32 |         copyfile(os.path.join(root, f), os.path.join(dest, root[sources_len+1:], f))
 33 | 
 34 | 
 35 | def model_dir(args):
 36 |   if args.model_name == 'resnet':
 37 |     return "{}{}".format(args.model_name, args.res_n)
 38 |   else:
 39 |     return "{}".format(args.model_name)
 40 | 
 41 | 
 42 | def get_synset(path: str):
 43 |   """Parse the LOC_synset_mapping.txt file given in imagenet dataset
 44 | 
 45 |   Args:
 46 |       path (str): path of the LOC_synset_mapping.txt file
 47 | 
 48 |   Returns:
 49 |       dict: dictionary mapping the label to the class id 
 50 |   """
 51 |   with open(path) as csv_file:
 52 |     csv_reader = csv.reader(csv_file, delimiter=' ')
 53 |     synset_dict = {}
 54 |     for i, row in enumerate(csv_reader):
 55 |       synset_dict[row[0]] = i
 56 |   return synset_dict
 57 | 
 58 | 
 59 | def get_paths(dir: str):
 60 |   pattern = os.path.join(dir, '**', '*.JPEG')
 61 |   return glob.glob(pattern, recursive=True)
 62 | 
 63 | 
 64 | def get_partial_paths(dir, percentage):
 65 |   """
 66 | 
 67 |   :param dir: train data directory
 68 |   :param percentage: based on the percentage select the partial images for each class
 69 |   :return:
 70 |   """
 71 |   random.seed(1)
 72 |   percentage /= 100
 73 |   class_dirs = os.listdir(dir)
 74 |   paths = []
 75 |   for d in class_dirs:
 76 |     class_paths = glob.glob(os.path.join(dir, d, '*.JPEG'))
 77 |     random.shuffle(class_paths)
 78 |     end = int(len(class_paths) * percentage)
 79 |     paths += class_paths[0:end]
 80 |   return paths
 81 | 
 82 | 
 83 | def get_val_label_dict(val_gt_path: str):
 84 |   with open(val_gt_path) as csv_file:
 85 |     csv_reader = csv.reader(csv_file, delimiter=',')
 86 |     next(csv_reader, None)  # skip the header
 87 |     val_dict = {}
 88 |     for row in csv_reader:
 89 |       val_dict[row[0]] = row[1].split(" ")[0]
 90 |   return val_dict
 91 | 
 92 | 
 93 | def get_imagenet_data(imagenet_data_args):
 94 |   """parse a imagenet dataset files and return usefull data for training and validation
 95 | 
 96 |   Args:
 97 |       synset_path (str): for instance "/home/user/upstride-tests/ILSVRC/LOC_synset_mapping.txt"
 98 |       train_dir (str): for instance "/home/user/upstride-tests/ILSVRC/Data/CLS-LOC/train/"
 99 |       training_percentage (int): 100 for training on the whole dataset
100 |       val_dir (str): for instance "/home/user/upstride-tests/ILSVRC/Data/CLS-LOC/val/"
101 |       val_gt_path (str): for instance "/home/user/upstride-tests/ILSVRC/LOC_val_solution.csv"
102 | 
103 |   Returns:
104 |       tuple of 4 elements : train_paths, train_labels, val_paths, val_labels
105 |       paths are lists of strings, labels are lists of integers
106 |   """
107 |   synset_path = imagenet_data_args['synset_path']
108 |   train_dir = imagenet_data_args['train_dir']
109 |   training_percentage = imagenet_data_args['train_data_percentage']
110 |   val_dir = imagenet_data_args['val_dir']
111 |   val_gt_path = imagenet_data_args['val_gt_path']
112 | 
113 |   synset = get_synset(synset_path)
114 |   train_paths = get_paths(train_dir) if training_percentage == 100 else get_partial_paths(train_dir, training_percentage)
115 |   train_labels = [synset[path.split("/")[-2]] for path in train_paths]
116 | 
117 |   # train data are shuffled
118 |   random.seed(0)
119 |   combined = list(zip(train_paths, train_labels))
120 |   random.shuffle(combined)
121 |   train_paths, train_labels = zip(*combined)
122 | 
123 |   val_label_dict = get_val_label_dict(val_gt_path)
124 |   val_paths = get_paths(val_dir)
125 |   val_labels = [synset[val_label_dict[path.split("/")[-1].split(".")[0]]] for path in val_paths]
126 | 
127 |   return train_paths, train_labels, val_paths, val_labels
128 | 
129 | 
130 | def check_folder(log_dir):
131 |   # TODO os.makedirs(..., exists_ok=True) does the job
132 |   if not os.path.exists(log_dir):
133 |     os.makedirs(log_dir)
134 |   return log_dir
135 | 


--------------------------------------------------------------------------------
/src/models/wide_resnet.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | from .generic_model import GenericModelBuilder
  4 | 
  5 | 
  6 | weight_init = tf.keras.initializers.VarianceScaling()
  7 | 
  8 | 
  9 | class WideResNet(GenericModelBuilder):
 10 |   def __init__(self, *args, **kwargs):
 11 |     super(WideResNet, self).__init__(*args, **kwargs)
 12 | 
 13 |   def model(self, x):
 14 |     layers = self.layers
 15 |     weight_regularizer = self.weight_regularizer
 16 |     num_blocks_per_resnet = self.blocks_per_group
 17 |     filters = [int(16/self.factor), 
 18 |                int(16*self.channel_multiplier/self.factor), 
 19 |                int(32*self.channel_multiplier/self.factor), 
 20 |                int(64*self.channel_multiplier/self.factor)]
 21 |     strides = [1, 2, 2]  # stride for each resblock
 22 |     final_stride_val = np.prod(strides)
 23 | 
 24 |     ch = filters[0]
 25 |     x = layers.Conv2D(ch, 3, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, padding="same", name='conv')(x)
 26 | 
 27 |     first_x = x  # Res from the beginning
 28 | 
 29 |     for block_num in range(1, 4):
 30 |       orig_x = x  # Res from previous block
 31 |       activate_before_residual = True if block_num == 1 else False
 32 |       block_name = f'resblock_{block_num}'
 33 |       x = self.resblock(x, filters[block_num - 1], filters[block_num], stride=strides[block_num-1], 
 34 |                activate_before_residual=activate_before_residual, block_name=block_name+'_0')
 35 |       for i in range(1, num_blocks_per_resnet):
 36 |         x = self.resblock(x, filters[block_num], filters[block_num], stride=1, 
 37 |                activate_before_residual=False, block_name=block_name+f'_{i}')
 38 |       orig_x = self._conform_size(filters[block_num - 1], filters[block_num],
 39 |                             strides[block_num - 1], orig_x, block_name=block_name+f'_{i}')
 40 |       x = layers.Add()([x, orig_x])
 41 | 
 42 |     orig_x = self._conform_size(filters[0], filters[-1],final_stride_val, first_x, 'last_block')
 43 |     x = layers.Add()([x, orig_x])
 44 | 
 45 |     x = layers.BatchNormalization(axis=self.channel_axis, name='batch_norm_last')(x)
 46 |     x = layers.Activation('relu', name='relu_last')(x)
 47 |     x = layers.GlobalAveragePooling2D()(x)
 48 |     return x
 49 | 
 50 | 
 51 |   def resblock(self, x, in_filter, out_filter, stride=1, use_bias=False, activate_before_residual=False, block_name='resblock'):
 52 |     layers = self.layers
 53 |     weight_regularizer = self.weight_regularizer
 54 |     if activate_before_residual:
 55 |       x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_0')(x)
 56 |       x = layers.Activation('relu', name=block_name + '/relu_0')(x)
 57 |       x_init = x
 58 |     else:
 59 |       x_init = x
 60 |       x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_0')(x)
 61 |       x = layers.Activation('relu', name=block_name + '/relu_0')(x)
 62 | 
 63 |     x = layers.Conv2D(out_filter, 3, stride, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer,
 64 |                              use_bias=use_bias, padding='same', name=block_name + '/conv_0')(x)
 65 |     x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_1')(x)
 66 |     x = layers.Activation('relu', name=block_name + '/relu_1')(x)
 67 |     x = layers.Conv2D(out_filter, 3, 1, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer,
 68 |                            use_bias=use_bias, padding='same', name=block_name + '/conv_1')(x)
 69 | 
 70 |     x_init = self._conform_size(in_filter, out_filter, stride, x_init, block_name)
 71 |     x = layers.Add()([x, x_init])
 72 |     return x
 73 | 
 74 |   def _conform_size(self, in_filter, out_filter, stride, x_init, block_name):
 75 |     layers = self.layers
 76 |     if in_filter != out_filter:
 77 |       x_init = layers.AveragePooling2D(pool_size=(stride, stride), name=block_name + '/avg_pool_0')(x_init)
 78 |       # hack to pad the channels
 79 |       if self.is_channels_first:
 80 |         x_init = tf.transpose(x_init, [0, 2, 3, 1]) # put the channels at index 3
 81 |         x_init = layers.ZeroPadding2D(padding=(0,(out_filter-in_filter)//2), name=block_name + '/zero_pad_0')(x_init)
 82 |         x_init = tf.transpose(x_init, [0, 3, 1, 2]) # put the channels back at index 1
 83 |       else:
 84 |         x_init = tf.transpose(x_init, [0, 3, 1, 2]) # put the channels at index 1
 85 |         x_init = layers.ZeroPadding2D(padding=((out_filter-in_filter)//2,0), name=block_name + '/zero_pad_0')(x_init)
 86 |         x_init = tf.transpose(x_init, [0, 2, 3, 1]) # put the channels back at index 3
 87 |     return x_init
 88 | 
 89 | class WideResNet28_10(WideResNet):
 90 |   def __init__(self, *args, **kwargs):
 91 |     self.channel_multiplier = 10
 92 |     self.blocks_per_group = 4
 93 |     super().__init__(*args, **kwargs)
 94 | 
 95 | class WideResNet40_2(WideResNet):
 96 |   def __init__(self, *args, **kwargs):
 97 |     self.channel_multiplier = 2
 98 |     self.blocks_per_group = 6
 99 |     super().__init__(*args, **kwargs)
100 | 
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------
/src/models/complexnet.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from .generic_model import GenericModelBuilder
  3 | 
  4 | 
  5 | class ComplexNet(GenericModelBuilder):
  6 |   def __init__(self, *args, **kwargs):
  7 |     super().__init__(*args, **kwargs)
  8 |     self.bn_args = {
  9 |         "axis": self.channel_axis,
 10 |         "momentum": 0.9,
 11 |         "epsilon": 1e-04
 12 |     }
 13 |     self.conv_args = {
 14 |         "padding": "same",
 15 |         "use_bias": False,
 16 |         "kernel_regularizer": self.weight_regularizer,
 17 |         "kernel_initializer": "he_ind"
 18 |     }
 19 | 
 20 |   def residual_block(self, x, channels: int, downsample=False):
 21 |     layers = self.layers
 22 |     x_init = x
 23 |     strides = (2, 2) if downsample else (1, 1)
 24 |     x = layers.BatchNormalizationC(**self.bn_args)(x)
 25 |     x = layers.Activation('relu')(x)
 26 |     x = layers.Conv2D(channels, 3, strides, **self.conv_args)(x)
 27 |     x = layers.BatchNormalizationC(**self.bn_args)(x)
 28 |     x = layers.Activation('relu')(x)
 29 |     x = layers.Conv2D(channels, 3, **self.conv_args)(x)
 30 |     if not downsample:
 31 |       x = layers.Add()([x, x_init])
 32 |     else:
 33 |       x_init = layers.Conv2D(channels, 1, 2, **self.conv_args)(x_init)
 34 |       x = layers.Concatenate(axis=self.channel_axis)([x_init, x])
 35 |     return x
 36 | 
 37 |   def learnVectorBlock(self, x):
 38 |     x = tf.keras.layers.BatchNormalization(**self.bn_args)(x)
 39 |     x = tf.keras.layers.Activation('relu')(x)
 40 |     x = tf.keras.layers.Convolution2D(3, self.lvb_kernel_size, kernel_initializer='he_normal', **self.conv_args)(x)
 41 |     x = tf.keras.layers.BatchNormalization(**self.bn_args)(x)
 42 |     x = tf.keras.layers.Activation('relu')(x)
 43 |     x = tf.keras.layers.Convolution2D(3, self.lvb_kernel_size, kernel_initializer='he_normal', **self.conv_args)(x)
 44 |     return x
 45 | 
 46 |   def model(self, x):
 47 |     n_channels = self.n_channels_type_0 // self.factor
 48 |     layers = self.layers
 49 |     if layers == tf.keras.layers:
 50 |       print("real definition")
 51 |       r = x
 52 |       x = self.learnVectorBlock(x)
 53 |       x = tf.keras.layers.Concatenate(axis=self.channel_axis)([r, x])
 54 |       self.conv_args['kernel_initializer'] = 'he_normal'
 55 |     
 56 |     x = self.layers.Conv2D(n_channels, 3, **self.conv_args)(x)
 57 |     x = self.layers.BatchNormalizationC(**self.bn_args)(x)
 58 |     x = self.layers.Activation('relu')(x)
 59 | 
 60 |     # First stage
 61 |     for i in range(self.n_blocks):  # -1 because the last one is a downsample
 62 |       x = self.residual_block(x, n_channels)
 63 |     x = self.residual_block(x, n_channels, True)
 64 | 
 65 |     # stage 2
 66 |     for i in range(self.n_blocks -  1):  # -1 because the last one is a downsample and one is removed (see paper)
 67 |       x = self.residual_block(x, n_channels * 2)
 68 |     x = self.residual_block(x, n_channels * 2, True)
 69 | 
 70 |     # stage 3
 71 |     for i in range(self.n_blocks -  1):  # -1 because the last one is a downsample and one is removed (see paper)
 72 |       x = self.residual_block(x, n_channels * 4)
 73 | 
 74 |     x = self.layers.GlobalAveragePooling2D()(x)
 75 | 
 76 |     return x
 77 | 
 78 | 
 79 | # Definition from the Quaternion Paper
 80 | class ShallowComplexNet(ComplexNet):
 81 |   def __init__(self, *args, **kwargs):
 82 |     self.conv_init = None
 83 |     self.n_blocks = 2
 84 |     self.n_channels_type_0 = 32
 85 |     self.lvb_kernel_size = 3
 86 |     super().__init__(*args, **kwargs)
 87 | 
 88 | 
 89 | class DeepComplexNet(ComplexNet):
 90 |   def __init__(self, *args, **kwargs):
 91 |     self.n_blocks = 11
 92 |     self.n_channels_type_0 = 32
 93 |     self.lvb_kernel_size = 3
 94 |     super().__init__(*args, **kwargs)
 95 | 
 96 | 
 97 | # definition from complex paper
 98 | 
 99 | # Wide and shallow definition
100 | class WSComplexNetTF(ComplexNet):
101 |   def __init__(self, *args, **kwargs):
102 |     self.n_blocks = 14
103 |     self.n_channels_type_0 = 18
104 |     self.lvb_kernel_size = 1
105 |     super().__init__(*args, **kwargs)
106 | 
107 | 
108 | class WSComplexNetUpStride(ComplexNet):
109 |   def __init__(self, *args, **kwargs):
110 |     self.n_blocks = 16
111 |     self.n_channels_type_0 = 12 * 2  # because 12 is the number of complex filter and we use factor 2
112 |     self.lvb_kernel_size = 1
113 |     super().__init__(*args, **kwargs)
114 | 
115 | # Deep and Narrow
116 | 
117 | 
118 | class DNComplexNetTF(ComplexNet):
119 |   def __init__(self, *args, **kwargs):
120 |     self.n_blocks = 23
121 |     self.n_channels_type_0 = 14
122 |     self.lvb_kernel_size = 1
123 |     super().__init__(*args, **kwargs)
124 | 
125 | 
126 | class DNComplexNetUpStride(ComplexNet):
127 |   def __init__(self, *args, **kwargs):
128 |     self.n_blocks = 23
129 |     self.n_channels_type_0 = 10 * 2  # because 12 is the number of complex filter and we use factor 2
130 |     self.lvb_kernel_size = 1
131 |     super().__init__(*args, **kwargs)
132 | 
133 | 
134 | # In Between
135 | class IBComplexNetTF(ComplexNet):
136 |   def __init__(self, *args, **kwargs):
137 |     self.n_blocks = 18
138 |     self.n_channels_type_0 = 16
139 |     self.lvb_kernel_size = 1
140 |     super().__init__(*args, **kwargs)
141 | 
142 | 
143 | class IBComplexNetUpStride(ComplexNet):
144 |   def __init__(self, *args, **kwargs):
145 |     self.n_blocks = 19
146 |     self.n_channels_type_0 = 11 * 2  # because 12 is the number of complex filter and we use factor 2
147 |     self.lvb_kernel_size = 1
148 |     super().__init__(*args, **kwargs)
149 | 


--------------------------------------------------------------------------------
/src/models/generic_model.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | import tensorflow as tf
  3 | 
  4 | """
  5 | Question:
  6 | - Weight decay ?
  7 | - kwargs for specific stuff ?
  8 | """
  9 | 
 10 | 
 11 | def load_upstride(upstride_type: int):
 12 |   """This function load one of upstride types 
 13 |   """
 14 |   if upstride_type == -1:
 15 |     return None
 16 |   if upstride_type == 0:
 17 |     import upstride.type0.tf.keras.layers as up_layers
 18 |     return up_layers
 19 |   if upstride_type == 1:
 20 |     import upstride.type1.tf.keras.layers as up_layers
 21 |     return up_layers
 22 |   if upstride_type == 2:
 23 |     import upstride.type2.tf.keras.layers as up_layers
 24 |     return up_layers
 25 |   if upstride_type == 3:
 26 |     import upstride.type3.tf.keras.layers as up_layers
 27 |     return up_layers
 28 | 
 29 | 
 30 | class GenericModelBuilder:
 31 |   def __init__(self, input_size, changing_ids: List[str], num_classes, factor=1, upstride_type=-1, tf2upstride_strategy="", upstride2tf_strategy="", weight_decay=0, **kwargs):
 32 |     self.input_size = input_size
 33 |     self.num_classes = num_classes
 34 |     self.factor = factor
 35 |     self.upstride_type = upstride_type
 36 |     self.tf2upstride_strategy = tf2upstride_strategy
 37 |     self.upstride2tf_strategy = upstride2tf_strategy
 38 |     self.is_channels_first = True if tf.keras.backend.image_data_format() == 'channels_first' else False
 39 |     self.channel_axis = 1 if self.is_channels_first else -1
 40 |     
 41 |     # Configure list of ids to change framework
 42 |     if upstride_type == -1:
 43 |       # then no switch between tf and upstride
 44 |       self.changing_ids = []
 45 |     elif changing_ids == []:
 46 |       # then set default parameters
 47 |       self.changing_ids = ['beginning', 'end_after_dense']
 48 |     else:
 49 |       self.changing_ids = changing_ids
 50 | 
 51 |     # kwargs contains special parameter that can be specific for one model. For instance
 52 |     # - load_searched_arch for architecture search method
 53 |     # - drop_path_prob for fb-net
 54 |     # - conversion_params if tf2upstride or upstride2tf need specific parameters
 55 |     # - hp : the keras-tuner hyperparameters
 56 |     self.kwargs = kwargs 
 57 | 
 58 |     # self.layers is the layers package to use when building the neural network
 59 |     self.layers = tf.keras.layers 
 60 |     self.upstride_layers = load_upstride(upstride_type)
 61 |     self._is_using_tf_layers = True
 62 | 
 63 |     # weight_regularizer can be call in the model definition in any subclass of GenericModel
 64 |     self.weight_regularizer = tf.keras.regularizers.l2(l=weight_decay)
 65 | 
 66 |     # if the model use custom keras Model then overide this
 67 |     # This is usefull for SAM method
 68 |     self.model_class = tf.keras.Model
 69 | 
 70 |     # if the model use other inputs than the image then it need to add these tensors in this list
 71 |     # This is usefull for P-Darts, FB-NET and SAM methods
 72 |     self.inputs = []
 73 | 
 74 | 
 75 |   def change_framework_if_necessary(self, id, inputs):
 76 |     """ When defining a custom model, this function should be called every time it can make sense to switch
 77 |     between tensorflow and upstride
 78 | 
 79 |     Args:
 80 |       x: can be a tensor or a list of tensors.
 81 | 
 82 |     Return: a tensor if x is a tensor, a list of tensors if x is a list of tensors
 83 |     """
 84 | 
 85 |     inputs_is_single_tensor = False
 86 |     if type(inputs) is not list:
 87 |       inputs_is_single_tensor = True
 88 |       inputs = [inputs]
 89 | 
 90 |     if id in self.changing_ids:
 91 |       if self._is_using_tf_layers:
 92 |         # Then converting from Tensorflow to Upstride
 93 |         self._is_using_tf_layers = False
 94 |         self.layers = self.upstride_layers
 95 | 
 96 |         out_tensors = []
 97 |         for x in inputs:
 98 |           out_tensors.append(self.upstride_layers.TF2Upstride(self.tf2upstride_strategy)(x))
 99 |       else:
100 |         # Then converting from Upstride to Tensorflow
101 |         self._is_using_tf_layers = True
102 |         self.layers = tf.keras.layers
103 | 
104 |         out_tensors = []
105 |         for x in inputs:
106 |           out_tensors.append(self.upstride_layers.Upstride2TF(self.upstride2tf_strategy)(x))
107 |     else:
108 |       # Don't change the input
109 |       out_tensors = inputs
110 | 
111 |     if inputs_is_single_tensor:
112 |       out_tensors = out_tensors[0]
113 | 
114 |     return out_tensors
115 | 
116 |   def model(self, x):
117 |     raise NotImplementedError("you need to overide method model")
118 | 
119 |   def build(self):
120 |     inputs = tf.keras.layers.Input(shape=self.input_size)
121 |     self.inputs.append(inputs)
122 |     if self.is_channels_first: 
123 |       inputs = tf.keras.layers.Lambda(lambda x: tf.transpose(x, [0, 3, 1, 2]),name='channels_first')(inputs)
124 |     x = self.change_framework_if_necessary("beginning", inputs)
125 |     # output_tensors is the list of the vectors to use to compute classification losses (main output + auxilary losses)
126 |     output_tensors = self.model(x)
127 |     if type(output_tensors) != list:
128 |       output_tensors = [output_tensors]
129 | 
130 |     output_tensors = self.change_framework_if_necessary("end_before_dense", output_tensors)
131 |     for i, x in enumerate(output_tensors):
132 |       output_tensors[i] = self.layers.Dense(self.num_classes, use_bias=True, name=f'Logits_{i}', kernel_regularizer=self.weight_regularizer)(x)
133 |     output_tensors = self.change_framework_if_necessary("end_after_dense", output_tensors)
134 | 
135 |     for i, x in enumerate(output_tensors):
136 |       output_tensors[i] = tf.keras.layers.Activation("softmax", dtype=tf.float32)(x)  # dtype float32 is important because of mixed precision
137 | 
138 |     model = self.model_class(self.inputs, output_tensors)
139 | 
140 |     return model
141 | 


--------------------------------------------------------------------------------
/src/models/hypermodels.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | # from kerastuner.applications import HyperResNet
  3 | from .generic_model import GenericModelBuilder
  4 | 
  5 | 
  6 | class SimpleHyper(GenericModelBuilder):
  7 |   def model(self, x):
  8 |     x = self.layers.Conv2D(self.hp.Int('conv1_filter',
  9 |                                        min_value=32//self.factor,
 10 |                                        max_value=512//self.factor,
 11 |                                        step=32//self.factor), (5, 5), 2, padding='same',
 12 |                            use_bias=False,
 13 |                            name='conv_1')(x)
 14 |     x = self.layers.BatchNormalization()(x)
 15 |     x = self.layers.Activation('relu')(x)
 16 |     x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
 17 |     for i in range(self.hp.Int('repeat_conv',
 18 |                                min_value=1,
 19 |                                max_value=3,
 20 |                                step=1)):
 21 |       x = self.layers.Conv2D(self.hp.Int('conv_filter',
 22 |                                          min_value=32//self.factor,
 23 |                                          max_value=512//self.factor,
 24 |                                          step=32//self.factor), (3, 3), padding='same',
 25 |                              use_bias=False)(x)
 26 |       x = self.layers.BatchNormalization()(x)
 27 |       x = self.layers.Activation('relu')(x)
 28 |       x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
 29 |     x = self.layers.Flatten()(x)
 30 |     x = self.layers.Dense(self.label_dim,
 31 |                           use_bias=True,
 32 |                           name='dense_1')(x)
 33 |     return x
 34 | 
 35 | 
 36 | class ResNetV2Hyper(GenericModelBuilder):
 37 |   """code from https://github.com/keras-team/keras-tuner/blob/master/kerastuner/applications/resnet.py
 38 |   """
 39 | 
 40 |   def model(self):
 41 |     conv3_depth = self.hp.Choice('conv3_depth', [4, 8])
 42 |     conv4_depth = self.hp.Choice('conv4_depth', [6, 23, 36])
 43 |     factor = self.hp.Int('factor', min_value=1, max_value=8, step=1)
 44 |     preact = True
 45 |     use_bias = True
 46 | 
 47 |     # Model definition.
 48 |     bn_axis = 3 if tf.keras.backend.image_data_format() == 'channels_last' else 1
 49 | 
 50 |     # Initial conv2d block.
 51 |     x = self.layers.ZeroPadding2D(padding=((3, 3), (3, 3)), name='conv1_pad')(x)
 52 |     x = self.layers.Conv2D(64 // factor, 7, strides=2, use_bias=use_bias, name='conv1_conv')(x)
 53 |     x = self.layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name='pool1_pad')(x)
 54 |     x = self.layers.MaxPooling2D(3, strides=2, name='pool1_pool')(x)
 55 | 
 56 |     # Middle hypertunable stack.
 57 |     x = stack2(self.layers, x, 64 // factor, 3, name='conv2')
 58 |     x = stack2(self.layers, x, 128 // factor, conv3_depth, name='conv3')
 59 |     x = stack2(self.layers, x, 256 // factor, conv4_depth, name='conv4')
 60 |     x = stack2(self.layers, x, 512 // factor, 3, stride1=1, name='conv5')
 61 | 
 62 |     # Top of the model.
 63 |     x = self.layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name='post_bn')(x)
 64 |     x = self.layers.Activation('relu', name='post_relu')(x)
 65 | 
 66 |     pooling = self.hp.Choice('pooling', ['avg', 'max'], default='avg')
 67 |     if pooling == 'avg':
 68 |       x = self.layers.GlobalAveragePooling2D(name='avg_pool')(x)
 69 |     elif pooling == 'max':
 70 |       x = self.layers.GlobalMaxPooling2D(name='max_pool')(x)
 71 | 
 72 |     x = self.layers.Dense(self.label_dim, activation='softmax', name='probs')(x)
 73 |     return x
 74 | 
 75 | 
 76 | def block2(layers, x, filters, kernel_size=3, stride=1, conv_shortcut=False, name=None):
 77 |   """A residual block.
 78 |   # Arguments
 79 |       x: input tensor.
 80 |       filters: integer, filters of the bottleneck layer.
 81 |       kernel_size: default 3, kernel size of the bottleneck layer.
 82 |       stride: default 1, stride of the first layer.
 83 |       conv_shortcut: default False, use convolution shortcut if True,
 84 |           otherwise identity shortcut.
 85 |       name: string, block label.
 86 |   # Returns
 87 |       Output tensor for the residual block.
 88 |   """
 89 |   bn_axis = 3 if tf.keras.backend.image_data_format() == 'channels_last' else 1
 90 | 
 91 |   preact = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_preact_bn')(x)
 92 |   preact = layers.Activation('relu', name=name + '_preact_relu')(preact)
 93 | 
 94 |   if conv_shortcut is True:
 95 |     shortcut = layers.Conv2D(4 * filters, 1, strides=stride, name=name + '_0_conv')(preact)
 96 |   else:
 97 |     shortcut = layers.MaxPooling2D(1, strides=stride)(x) if stride > 1 else x
 98 | 
 99 |   x = layers.Conv2D(filters, 1, strides=1, use_bias=False, name=name + '_1_conv')(preact)
100 |   x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')(x)
101 |   x = layers.Activation('relu', name=name + '_1_relu')(x)
102 | 
103 |   x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x)
104 |   x = layers.Conv2D(filters, kernel_size, strides=stride, use_bias=False, name=name + '_2_conv')(x)
105 |   x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_2_bn')(x)
106 |   x = layers.Activation('relu', name=name + '_2_relu')(x)
107 | 
108 |   x = layers.Conv2D(4 * filters, 1, name=name + '_3_conv')(x)
109 |   x = layers.Add(name=name + '_out')([shortcut, x])
110 |   return x
111 | 
112 | 
113 | def stack2(layers, x, filters, blocks, stride1=2, name=None):
114 |   """A set of stacked residual blocks.
115 |   # Arguments
116 |       x: input tensor.
117 |       filters: integer, filters of the bottleneck layer in a block.
118 |       blocks: integer, blocks in the stacked blocks.
119 |       stride1: default 2, stride of the first layer in the first block.
120 |       name: string, stack label.
121 |   # Returns
122 |       Output tensor for the stacked blocks.
123 |   """
124 |   x = block2(layers, x, filters, conv_shortcut=True, name=name + '_block1')
125 |   for i in range(2, blocks):
126 |     x = block2(layers, x, filters, name=name + '_block' + str(i))
127 |   x = block2(layers, x, filters, stride=stride1, name=name + '_block' + str(blocks))
128 |   return x
129 | 


--------------------------------------------------------------------------------
/inference_benchmark.py:
--------------------------------------------------------------------------------
  1 | """Script to benchmark several versions of Tensorflow and Upstride Tech on different hardware and docker platforms
  2 | 
  3 | to start a new benchmark, you can run 
  4 | python inference_benchmark.py --yaml_config conf1.yml conf2.yml --comments "small test"
  5 | """
  6 | import os
  7 | from typing import List
  8 | import requests
  9 | import json
 10 | import yaml
 11 | import upstride_argparse as argparse
 12 | 
 13 | ENGINES = ['upstride_0', 'upstride_1', 'upstride_2', 'upstride_3', 'tensorflow']
 14 | 
 15 | inference_arguments = [
 16 |     [int, "batch_size", 1, 'The size of batch per gpu', lambda x: x > 0],
 17 |     [str, "comments", "", 'some comment about this benchmark run. Will be displayed on the model zoo'],
 18 |     [bool, 'cpu', False, 'is True then force cpu use'],
 19 |     [int, 'cuda_visible_device', 0, 'the gpu to run the benchmark on'],
 20 |     ['list[str]', "docker_images", [], "list of docker images to test"],
 21 |     ['list[str]', "engines", [], "list of engines to test", lambda x: all(engine in ENGINES for engine in x)],
 22 |     [float, 'factor', 1, 'division factor for che number of channel per layer'],
 23 |     [str, "model_path", "", 'Specify the model path, to work on a real model instead of a fake one with random weights'],
 24 |     ['list[str]', "models", [], "list of models to test"],
 25 |     [str, "output", "results.md", "file with results"],
 26 |     [str, "profiling_dir", "/tmp", "dir where profiling files will be written"],
 27 |     [int, 'n_steps', 10, "number of steps to run the inference. The higher the better"],
 28 |     [bool, "tensorrt", False, "if true then models will be converted to tensorrt"],
 29 |     [str, "tensorrt_precision", 'FP32', 'Provide precision FP32 or FP16 for optimizing tensorrt'],
 30 |     ['list[str]', "yaml_config", [], "config files there can be as many implemented these options"],
 31 |     [bool, "xla", False, "if true then use xla"],
 32 | ]
 33 | 
 34 | 
 35 | def create_all_environment_configs(conf):
 36 |   """Create a list of dict with the docker, model and engine to benchmark. 
 37 |   will create all possible triplet
 38 | 
 39 |   Returns:
 40 |       List of Dict with docker, model and engine
 41 |   """
 42 |   env_configs = []
 43 |   for docker in conf["docker_images"]:
 44 |     for model in conf["models"]:
 45 |       for engine in conf["engines"]:
 46 |         env_configs.append({"docker": docker,
 47 |                             "model": model,
 48 |                             "engine": engine})
 49 |   return env_configs
 50 | 
 51 | 
 52 | def prepare_docker(docker_images: List[str]):
 53 |   """download all docker images to prepare benchmark
 54 | 
 55 |   there is one exception if docker_images is "local" : in this case the benchmark with run with the host python 
 56 |   without using docker
 57 | 
 58 |   Args:
 59 |       docker_images (List[str]): should be formated as ["docker_tag:docker_label", ...]
 60 |   """
 61 |   for docker_image in docker_images:
 62 |     if docker_image == "local":
 63 |       continue
 64 |     print(f"Pulling {docker_image}")
 65 |     stream = os.popen(f"docker pull {docker_image}")
 66 |     out = stream.read()
 67 |     print(out)
 68 | 
 69 | 
 70 | def docker_run_cmd(docker, engine, model, config):
 71 |   # dev note: all option need to have a space at the end
 72 |   python_cmd = f"python3 src/inference_benchmark.py "\
 73 |       f"--batch_size {config['batch_size']} "\
 74 |       f"--engine {engine} "\
 75 |       f"--factor {config['factor']} "\
 76 |       f"--model_name {model} "\
 77 |       f"--n_steps {config['n_steps']} "\
 78 |       f"--profiler_path {config['profiling_dir']} "
 79 |   if config['tensorrt']:
 80 |     python_cmd += f"--export_tensorrt "
 81 |     python_cmd += f"--tensorrt_precision {config['tensorrt_precision']} "
 82 |   if config['model_path']:
 83 |     python_cmd += f"--model_path {config['model_path']} "
 84 |   if config['xla']:
 85 |     python_cmd += f"--xla "
 86 | 
 87 |   if docker == "local":
 88 |     # then run without docker
 89 |     return python_cmd
 90 | 
 91 |   runtime = f"--gpus all -e CUDA_VISIBLE_DEVICES={config['cuda_visible_device']}" if "gpu" in docker and not config['cpu'] else ""
 92 |   volumes = " -v $(pwd)/src:/src -v /tmp/docker:/tmp"
 93 |   # Add a volume to save the profiling
 94 |   # docker need also to be run with the privileged parameter to access gpu information
 95 |   volumes += " -v $(pwd)/profiling:/profiling --privileged=true"
 96 |   return f"docker run -it --rm {runtime} {volumes} {docker} {python_cmd}"
 97 | 
 98 | 
 99 | def format_results(env_configs, results, output_file):
100 |   with open(output_file, "w") as f:
101 |     f.write(f"| docker                                                       |engine      |model    |n_iteration|total time|time per iteration|FPS    |\n")
102 |     f.write(f"|:------------------------------------------------------------:|:----------:|:-------:|:---------:|:--------:|:----------------:|:-----:|\n")
103 |     for i in range(len(results)):
104 |       result = results[i]
105 |       env_config = env_configs[i]
106 |       time_per_iteration = result['total_time']/result['n_iterations']
107 |       fps = 1/time_per_iteration
108 |       line = f"| {env_config['docker']} | {env_config['engine']: <10} | {env_config['model']} | {result['n_iterations']} | {result['total_time']:.2f} | {time_per_iteration:.3f} | {fps:.1f} |\n"
109 |       f.write(line)
110 | 
111 | 
112 | def benchmark(config):
113 |   print(config)
114 |   # currently first gpu is being picked if there are multiple GPUs.
115 |   # conf['hardware']['gpu'] = get_gpu_info().get('name')[0]
116 |   prepare_docker(config["docker_images"])
117 |   # benchmark all docker images against all models against all engine
118 |   env_configs = create_all_environment_configs(config)
119 |   results = []
120 |   for env_config in env_configs:
121 |     print(f"Benchmark {env_config['model']} using {env_config['engine']} on {env_config['docker']}")
122 |     cmd = docker_run_cmd(env_config['docker'], env_config['engine'], env_config['model'], config)
123 |     print(cmd)
124 |     stream = os.popen(cmd)
125 |     out = stream.read()
126 |     print(out)
127 |     # look for a correct output
128 |     i = 2
129 |     while out.split('\n')[-i][0] != '{':
130 |       i += 1
131 |       # print(i)
132 |     r = json.loads(out.split('\n')[-i])
133 |     results.append(r)
134 |   format_results(env_configs, results, config["output"])
135 | 
136 | 
137 | if __name__ == "__main__":
138 |   config = argparse.parse_cmd(inference_arguments)
139 |   benchmark(config)
140 | 


--------------------------------------------------------------------------------
/src/models/fbnetv2.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | from typing import List
  4 | import yaml
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | 
  8 | # global variables needed for softmax gumbel computation in MaskConv
  9 | temperature = 5.0  # should be multiply by 0.956 at the end of every epoch, see section 4.1 in the paper
 10 | 
 11 | 
 12 | def define_temperature(new_temperature):
 13 |   global temperature
 14 |   temperature = new_temperature
 15 | 
 16 | 
 17 | def create_binary_vector(channel_sizes: List[int], dtype) -> List[tf.Tensor]:
 18 |   """this function return a list of vector with ones at the beginning and zeros at the end
 19 |   it uses numpy because there is no reason for these operations to be inside the tensorflow graph.
 20 | 
 21 |   Args:
 22 |       channel_sizes (List[int]): number of channels in the convolution
 23 | 
 24 |   Returns:
 25 |       List[tf.Tensor]: list of vector like [1., 1., 1., 0., 0., 0.]
 26 |   """
 27 |   binary_vectors = []
 28 |   max_size = channel_sizes[-1]
 29 |   for i in range(len(channel_sizes)):
 30 |     ones = np.ones(channel_sizes[i])
 31 |     zeros = np.zeros(max_size - channel_sizes[i])
 32 |     binary_vectors.append(tf.convert_to_tensor(np.concatenate([ones, zeros], 0), dtype=dtype))
 33 |   return binary_vectors
 34 | 
 35 | 
 36 | def gumbel_softmax(logits, gumble_noise=False):
 37 |   """please have a look at https://arxiv.org/pdf/1611.01144.pdf for gumble definition
 38 |   """
 39 |   global temperature
 40 | 
 41 |   if gumble_noise:
 42 |     # Gumble distribution -log(-log(u)), where u ~ (0,1) is a uniform distribution and
 43 |     # must be sampled from the open-interval `(0, 1)` but tf.random.uniform generates samples
 44 |     #  where The lower bound minval is included in the range like [0, 1). To make sure the range
 45 |     # to be (0, 1), np.finfo(float).tiny is used as minval which gives a tiny postive floating point number
 46 |     u = tf.random.uniform(minval=np.finfo(float).tiny, maxval=1.0, shape=tf.shape(logits))
 47 |     noise = -tf.math.log(-tf.math.log(u))  # Noise from gumbel distribution
 48 |   else:
 49 |     noise = 0.0001
 50 |   # During mixed precision training, Weight Variable data type is inferred from "inputs" during call method
 51 |   # This makes alpha to be converted to float16. 
 52 |   # Since we are computing softmax at the end, we need to convert logits(alpha) to float32
 53 |   logits = tf.cast(logits, tf.float32) 
 54 |   noisy_logits = (noise + logits) / temperature
 55 | 
 56 |   return tf.math.softmax(noisy_logits)
 57 | 
 58 | 
 59 | def get_mask(binary_vectors: List[tf.Tensor], g: List[float]):
 60 |   vectors = [g[i] * binary_vectors[i] for i in range(g.shape[0])]
 61 |   vectors = tf.stack(vectors, axis=0)
 62 |   vector = tf.reduce_sum(vectors, axis=0)
 63 |   return vector
 64 | 
 65 | 
 66 | class ChannelMasking(tf.keras.layers.Layer):
 67 |   def __init__(self, min: int, max: int, step: int, name: str, gumble_noise=True, regularizer=None):
 68 |     super().__init__(name=name)
 69 |     self.min = min
 70 |     self.max = max
 71 |     self.step = step
 72 |     self.channel_sizes = []
 73 |     self.gumble_noise = gumble_noise
 74 |     self.regularizer = regularizer
 75 |     for i in range(self.min, self.max+1, self.step):
 76 |       self.channel_sizes.append(i)
 77 | 
 78 |   def build(self, input_shape):
 79 |     self.alpha = self.add_weight(name=f"alpha",
 80 |                                  shape=(len(self.channel_sizes),),
 81 |                                  initializer=tf.keras.initializers.Constant(value=1.), regularizer=self.regularizer)
 82 |     self.binary_vectors = create_binary_vector(self.channel_sizes, dtype=self.alpha.dtype)
 83 | 
 84 |   def call(self, inputs):
 85 |     self.g = gumbel_softmax(self.alpha, self.gumble_noise)
 86 |     mask = get_mask(self.binary_vectors,  self.g)
 87 |     # Convert mast from Float32 to Float16 during mixed precision. 
 88 |     mask = tf.cast(mask, dtype=inputs.dtype)
 89 | 
 90 |     # work with channel last but not channel first
 91 |     if tf.keras.backend.image_data_format() == 'channels_first':
 92 |       mask = tf.reshape(mask, [1, self.channel_sizes[-1], 1, 1])
 93 |     if type(inputs) == list:
 94 |       return [mask * inputs[i] for i in range(len(inputs))]
 95 |     else:
 96 |       return mask * inputs
 97 | 
 98 | 
 99 | def exponential_decay(initial_value, decay_steps, decay_rate):
100 |   """
101 |           Applies exponential decay to initial value
102 |        Args:
103 |           initial_value: The initial learning value
104 |           decay_steps: Number of steps to decay over
105 |           decay_rate: decay rate
106 |       """
107 |   return lambda step: initial_value * decay_rate ** (step / decay_steps)
108 | 
109 | 
110 | def split_trainable_weights(model, arch_params_name='alpha'):
111 |   """
112 |       split the model parameters  in weights and architectural params
113 |   """
114 |   weights = []
115 |   arch_params = []
116 |   for trainable_weight in model.trainable_variables:
117 |     if arch_params_name in trainable_weight.name:
118 |       arch_params.append(trainable_weight)
119 |     else:
120 |       weights.append(trainable_weight)
121 |   if not arch_params:
122 |     raise ValueError(f"No architecture parameters found by the name {arch_params_name}")
123 |   return weights, arch_params
124 | 
125 | 
126 | def post_training_analysis(model, saved_file_path):
127 |   layer_name = ''
128 |   saved_file_content = {}
129 |   for layer in model.layers:
130 |     # if type(layer) == tf.keras.Conv2D:
131 |     #     layer_name = layer.name
132 |     if type(layer) == ChannelMasking and layer.name[-8:] == '_savable':
133 |       layer_name = layer.name[:-8]
134 |       max_alpha_id = int(tf.math.argmax(layer.alpha).numpy())
135 |       value = layer.min + max_alpha_id * layer.step
136 |       saved_file_content[layer_name] = value
137 |   print(saved_file_content)
138 |   with open(saved_file_path, 'w') as f:
139 |     yaml.dump(saved_file_content, f)
140 | 
141 | 
142 | def save_arch_params(model, epoch, log_dir):
143 |   json_file_path = os.path.join(log_dir, f'alpha.json')
144 |   content = {}
145 |   if os.path.exists(json_file_path):
146 |     with open(json_file_path) as f:
147 |       content = json.load(f)
148 |   for layer in model.layers:
149 |     if type(layer) == ChannelMasking:
150 |       # need to convert from numpy.float32 to pure python float32 to prepare the dumps
151 |       if str(epoch) not in content:
152 |         content[str(epoch)] = {}
153 |       content[str(epoch)][layer.name] = list(map(float, layer.alpha.numpy()))
154 |   with open(json_file_path, 'w') as f:
155 |     f.write(json.dumps(content))
156 | 


--------------------------------------------------------------------------------
/scripts/test_tfrecord_writer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | import tempfile
  4 | import unittest
  5 | import cv2
  6 | import numpy as np
  7 | import yaml
  8 | from tfrecord_writer import build_tfrecord_dataset
  9 | 
 10 | 
 11 | class TestTfrecordWriter(unittest.TestCase):
 12 |   def test_process_images_in_class_directory(self):
 13 |     num_examples_each_class = 10
 14 |     data_dir = create_fake_dataset(num_examples_each_class)
 15 |     name = 'Test-dataset'
 16 |     description = 'A small test datset'
 17 |     tfrecord_dir_path = tempfile.mkdtemp()
 18 | 
 19 |     args = {'name': name, 'description': description, 'tfrecord_dir_path': tfrecord_dir_path,
 20 |             'tfrecord_size': 2, 'preprocessing': 'NO', 'image_size': (224, 224), "n_tfrecords":0,
 21 |             'data': {'images_dir_path': data_dir,
 22 |                      'annotation_file_path': None,
 23 |                      'delimiter': ',',
 24 |                      'header_exists': False,
 25 |                      'split_names': ['train', 'validation', 'test'],
 26 |                      'split_percentages': [0.8, 0.1, 0.1],
 27 |                      }
 28 |             }
 29 |     build_tfrecord_dataset(args)
 30 | 
 31 |     dataset_info = load_yaml(data_dir=tfrecord_dir_path, dataset_name=name)
 32 | 
 33 |     # check newly created datset name and description
 34 |     self.assertEqual(name, dataset_info['name'])
 35 |     self.assertEqual(description, dataset_info['description'])
 36 | 
 37 |     i = 0
 38 |     # check split percentage
 39 |     for split_name, split_items in dataset_info['splits'].items():
 40 |       num_exmaples = split_items['num_examples']
 41 |       self.assertAlmostEqual(args['data']['split_percentages'][i], num_exmaples / (2.0 * num_examples_each_class))
 42 |       i += 1
 43 | 
 44 |     shutil.rmtree(data_dir)
 45 |     shutil.rmtree(tfrecord_dir_path)
 46 | 
 47 |   def test_process_images_in_class_directory_fixed_number_of_tfrecord(self):
 48 |     num_examples_each_class = 10
 49 |     data_dir = create_fake_dataset(num_examples_each_class)
 50 |     name = 'Test-dataset'
 51 |     description = 'A small test datset'
 52 |     tfrecord_dir_path = tempfile.mkdtemp()
 53 | 
 54 |     args = {'name': name, 'description': description, 'tfrecord_dir_path': tfrecord_dir_path,
 55 |             'tfrecord_size': 20, 'preprocessing': 'NO', 'image_size': (224, 224), "n_tfrecords":5,
 56 |             'data': {'images_dir_path': data_dir,
 57 |                      'annotation_file_path': None,
 58 |                      'delimiter': ',',
 59 |                      'header_exists': False,
 60 |                      'split_names': ['train', 'validation', 'test'],
 61 |                      'split_percentages': [0.8, 0.1, 0.1],
 62 |                      }
 63 |             }
 64 |     build_tfrecord_dataset(args)
 65 |     print(tfrecord_dir_path)
 66 |     dataset_info = load_yaml(data_dir=tfrecord_dir_path, dataset_name=name)
 67 | 
 68 |     # check newly created datset name and description
 69 |     self.assertEqual(name, dataset_info['name'])
 70 |     self.assertEqual(description, dataset_info['description'])
 71 | 
 72 |     i = 0
 73 |     # check split percentage
 74 |     for split_name, split_items in dataset_info['splits'].items():
 75 |       num_exmaples = split_items['num_examples']
 76 |       self.assertAlmostEqual(args['data']['split_percentages'][i], num_exmaples / (2.0 * num_examples_each_class))
 77 |       i += 1
 78 | 
 79 |     # check number of tfrecords 
 80 |     self.assertEqual(5, len(dataset_info['splits']['train']['tfrecord_files']))
 81 | 
 82 |     shutil.rmtree(data_dir)
 83 |     shutil.rmtree(tfrecord_dir_path)
 84 | 
 85 | 
 86 | 
 87 |   def test_process_with_annotation_file(self):
 88 |     num_examples_each_class = 10
 89 |     data_dir, annotation_file = create_fake_dataset_with_annotation_file(num_examples_each_class)
 90 |     name = 'Test-dataset'
 91 |     description = 'A small test datset'
 92 |     tfrecord_dir_path = tempfile.mkdtemp()
 93 | 
 94 |     args = {'name': name, 'description': description, 'tfrecord_dir_path': tfrecord_dir_path,
 95 |             'tfrecord_size': 2, 'preprocessing': 'NO', 'image_size': (224, 224), "n_tfrecords":0,
 96 |             'data': {'images_dir_path': data_dir,
 97 |                      'annotation_file_path': annotation_file,
 98 |                      'delimiter': ',',
 99 |                      'header_exists': False,
100 |                      'split_names': ['train', 'validation', 'test'],
101 |                      'split_percentages': [0.8, 0.1, 0.1],
102 |                      }
103 |             }
104 |     build_tfrecord_dataset(args)
105 | 
106 |     dataset_info = load_yaml(data_dir=tfrecord_dir_path, dataset_name=name)
107 | 
108 |     # check newly created datset name and description
109 |     self.assertEqual(name, dataset_info['name'])
110 |     self.assertEqual(description, dataset_info['description'])
111 | 
112 |     i = 0
113 |     # check split percentage
114 |     for split_name, split_items in dataset_info['splits'].items():
115 |       num_exmaples = split_items['num_examples']
116 |       self.assertAlmostEqual(args['data']['split_percentages'][i], num_exmaples / (2.0 * num_examples_each_class))
117 |       i += 1
118 | 
119 |     shutil.rmtree(data_dir)
120 |     shutil.rmtree(tfrecord_dir_path)
121 | 
122 | 
123 | def load_yaml(data_dir, dataset_name):
124 |   yaml_file = os.path.join(data_dir, dataset_name, 'dataset_info.yaml')
125 |   with open(yaml_file, 'r') as stream:
126 |     try:
127 |       dataset_info = yaml.safe_load(stream)
128 |     except yaml.YAMLError as e:
129 |       print('Error parsing file', yaml_file)
130 |       raise e
131 |   return dataset_info
132 | 
133 | 
134 | def create_fake_dataset(n_images_per_class=2):
135 |   dataset_dir = tempfile.mkdtemp()
136 |   os.makedirs(os.path.join(dataset_dir, 'cat'), exist_ok=True)
137 |   os.makedirs(os.path.join(dataset_dir, 'dog'), exist_ok=True)
138 |   for i in range(n_images_per_class):
139 |     cv2.imwrite(os.path.join(dataset_dir, 'dog', '{}.jpg'.format(i)), np.ones((640, 480, 3), dtype=np.uint8) * 255)
140 |     cv2.imwrite(os.path.join(dataset_dir, 'cat', '{}.jpg'.format(i)), np.ones((640, 480, 3), dtype=np.uint8) * 255)
141 |   return dataset_dir
142 | 
143 | 
144 | def create_fake_dataset_with_annotation_file(n_images_per_class=2):
145 |   dataset_dir = tempfile.mkdtemp()
146 |   os.makedirs(dataset_dir, exist_ok=True)
147 | 
148 |   annotation_file = os.path.join(dataset_dir, 'annotations.txt')
149 | 
150 |   labels = ['cat', 'dog']
151 | 
152 |   with open(annotation_file, 'w', encoding='utf-8') as f:
153 |     for i in range(n_images_per_class*2):
154 |       cv2.imwrite(os.path.join(dataset_dir, '{}.jpg'.format(i)), np.ones((640, 480, 3), dtype=np.uint8) * 255)
155 |       line = '{}.jpg'.format(i) + "," + labels[i % 2] + "\n"
156 |       f.write(line)
157 | 
158 |   return dataset_dir, annotation_file
159 | 


--------------------------------------------------------------------------------
/tests/system_tests/models_training.sh:
--------------------------------------------------------------------------------
  1 | # train a mobilenet channel first with tensorflow
  2 | # need a least 6 GB of VRAM
  3 | python3 train.py \
  4 |     --model.name MobileNetV2NCHW \
  5 |     --model.factor 1 \
  6 |     --model.num_classes 10 \
  7 |     --model.input_size 224 224 3 \
  8 |     --num_epochs 2 \
  9 |     --checkpoint_dir /tmp/checkpoint \
 10 |     --log_dir /tmp/results \
 11 |     --dataloader.batch_size 128 \
 12 |     --dataloader.name imagenette/full-size-v2 \
 13 |     --early_stopping 100 \
 14 |     --dataloader.train_list RandomCropThenResize Normalize RandomHorizontalFlip Cutout ColorJitter Translate \
 15 |     --dataloader.val_list Normalize CentralCrop \
 16 |     --dataloader.val_split_id validation \
 17 |     --dataloader.train_split_id train \
 18 |     --dataloader.Translate.width_shift_range 0.2 \
 19 |     --dataloader.Translate.height_shift_range 0.2 \
 20 |     --dataloader.RandomCrop.size 224 224 3 \
 21 |     --dataloader.CentralCrop.size 224 224 \
 22 |     --dataloader.Cutout.length 16 \
 23 |     --optimizer.name sgd_nesterov \
 24 |     --optimizer.lr 0.1 \
 25 |     --optimizer.lr_decay_strategy.lr_params.patience 20 \
 26 |     --optimizer.lr_decay_strategy.lr_params.strategy cosine_decay \
 27 |     --optimizer.lr_decay_strategy.lr_params.decay_rate 0.3 \
 28 |     --config.mixed_precision
 29 | 
 30 | # Total params: 2,270,794
 31 | # Trainable params: 2,236,682
 32 | # Non-trainable params: 34,112
 33 | # Epoch 2 takes 40 second using a GTX 1080 and should reach >30% validation accuracy
 34 | 
 35 | rm -r /tmp/results 
 36 | rm -r /tmp/checkpoint
 37 | 
 38 | # train a mobilenet channel last with tensorflow
 39 | python3 train.py \
 40 |     --model.name MobileNetV2 \
 41 |     --model.factor 1 \
 42 |     --model.num_classes 10 \
 43 |     --model.input_size 224 224 3 \
 44 |     --num_epochs 2 \
 45 |     --checkpoint_dir /tmp/checkpoint \
 46 |     --log_dir /tmp/results \
 47 |     --dataloader.batch_size 128 \
 48 |     --dataloader.name imagenette/full-size-v2 \
 49 |     --early_stopping 100 \
 50 |     --dataloader.train_list RandomCropThenResize Normalize RandomHorizontalFlip Cutout ColorJitter Translate \
 51 |     --dataloader.val_list Normalize CentralCrop \
 52 |     --dataloader.val_split_id validation \
 53 |     --dataloader.train_split_id train \
 54 |     --dataloader.Translate.width_shift_range 0.2 \
 55 |     --dataloader.Translate.height_shift_range 0.2 \
 56 |     --dataloader.RandomCrop.size 224 224 3 \
 57 |     --dataloader.CentralCrop.size 224 224 \
 58 |     --dataloader.Cutout.length 16 \
 59 |     --optimizer.name sgd_nesterov \
 60 |     --optimizer.lr 0.1 \
 61 |     --optimizer.lr_decay_strategy.lr_params.patience 20 \
 62 |     --optimizer.lr_decay_strategy.lr_params.strategy cosine_decay \
 63 |     --optimizer.lr_decay_strategy.lr_params.decay_rate 0.3 \
 64 |     --config.mixed_precision
 65 | 
 66 | # Total params: 2,270,794
 67 | # Trainable params: 2,236,682
 68 | # Non-trainable params: 34,112
 69 | # Epoch 2 takes 40 second using a GTX 1080 and should reach >30% validation accuracy
 70 | 
 71 | rm -r /tmp/results 
 72 | rm -r /tmp/checkpoint
 73 | 
 74 | # train a resnet channel last with tensorflow
 75 | python3 train.py \
 76 |     --model.name ResNet18 \
 77 |     --model.factor 1 \
 78 |     --model.num_classes 10 \
 79 |     --model.input_size 224 224 3 \
 80 |     --num_epochs 2 \
 81 |     --checkpoint_dir /tmp/checkpoint \
 82 |     --log_dir /tmp/results \
 83 |     --dataloader.batch_size 128 \
 84 |     --dataloader.name imagenette/full-size-v2 \
 85 |     --early_stopping 100 \
 86 |     --dataloader.train_list RandomCropThenResize Normalize RandomHorizontalFlip Cutout ColorJitter Translate \
 87 |     --dataloader.val_list Normalize CentralCrop \
 88 |     --dataloader.val_split_id validation \
 89 |     --dataloader.train_split_id train \
 90 |     --dataloader.Translate.width_shift_range 0.2 \
 91 |     --dataloader.Translate.height_shift_range 0.2 \
 92 |     --dataloader.RandomCrop.size 224 224 3 \
 93 |     --dataloader.CentralCrop.size 224 224 \
 94 |     --dataloader.Cutout.length 16 \
 95 |     --optimizer.name sgd_nesterov \
 96 |     --optimizer.lr 0.1 \
 97 |     --optimizer.lr_decay_strategy.lr_params.patience 20 \
 98 |     --optimizer.lr_decay_strategy.lr_params.strategy cosine_decay \
 99 |     --optimizer.lr_decay_strategy.lr_params.decay_rate 0.3 \
100 |     --config.mixed_precision
101 | 
102 | # Total params: 11,188,728
103 | # Trainable params: 11,180,920
104 | # Non-trainable params: 7,808
105 | 
106 | # train a resnet channel first with tensorflow
107 | python3 train.py \
108 |     --model.name ResNet18NCHW \
109 |     --model.factor 1 \
110 |     --model.num_classes 10 \
111 |     --model.input_size 224 224 3 \
112 |     --num_epochs 2 \
113 |     --checkpoint_dir /tmp/checkpoint \
114 |     --log_dir /tmp/results \
115 |     --dataloader.batch_size 128 \
116 |     --dataloader.name imagenette/full-size-v2 \
117 |     --early_stopping 100 \
118 |     --dataloader.train_list RandomCropThenResize Normalize RandomHorizontalFlip Cutout ColorJitter Translate \
119 |     --dataloader.val_list Normalize CentralCrop \
120 |     --dataloader.val_split_id validation \
121 |     --dataloader.train_split_id train \
122 |     --dataloader.Translate.width_shift_range 0.2 \
123 |     --dataloader.Translate.height_shift_range 0.2 \
124 |     --dataloader.RandomCrop.size 224 224 3 \
125 |     --dataloader.CentralCrop.size 224 224 \
126 |     --dataloader.Cutout.length 16 \
127 |     --optimizer.name sgd_nesterov \
128 |     --optimizer.lr 0.1 \
129 |     --optimizer.lr_decay_strategy.lr_params.patience 20 \
130 |     --optimizer.lr_decay_strategy.lr_params.strategy cosine_decay \
131 |     --optimizer.lr_decay_strategy.lr_params.decay_rate 0.3 \
132 |     --config.mixed_precision
133 | 
134 | # Total params: 11,188,728
135 | # Trainable params: 11,180,920
136 | # Non-trainable params: 7,808
137 | 
138 | # train a resnet cifar channel last with tensorflow
139 | python3 train.py \
140 |     --model.name ResNet20CIFAR \
141 |     --model.factor 1 \
142 |     --model.num_classes 10 \
143 |     --model.input_size 224 224 3 \
144 |     --num_epochs 2 \
145 |     --checkpoint_dir /tmp/checkpoint \
146 |     --log_dir /tmp/results \
147 |     --dataloader.batch_size 128 \
148 |     --dataloader.name imagenette/full-size-v2 \
149 |     --early_stopping 100 \
150 |     --dataloader.train_list RandomCropThenResize Normalize RandomHorizontalFlip Cutout ColorJitter Translate \
151 |     --dataloader.val_list Normalize CentralCrop \
152 |     --dataloader.val_split_id validation \
153 |     --dataloader.train_split_id train \
154 |     --dataloader.Translate.width_shift_range 0.2 \
155 |     --dataloader.Translate.height_shift_range 0.2 \
156 |     --dataloader.RandomCrop.size 224 224 3 \
157 |     --dataloader.CentralCrop.size 224 224 \
158 |     --dataloader.Cutout.length 16 \
159 |     --optimizer.name sgd_nesterov \
160 |     --optimizer.lr 0.1 \
161 |     --optimizer.lr_decay_strategy.lr_params.patience 20 \
162 |     --optimizer.lr_decay_strategy.lr_params.strategy cosine_decay \
163 |     --optimizer.lr_decay_strategy.lr_params.decay_rate 0.3 \
164 |     --config.mixed_precision
165 | 
166 | # Total params: 294,250
167 | # Trainable params: 292,874
168 | # Non-trainable params: 1,376
169 | 


--------------------------------------------------------------------------------
/src/data/dataloader.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import List, Dict
  3 | import tensorflow as tf
  4 | import tensorflow_datasets as tfds
  5 | from . import augmentations
  6 | import yaml
  7 | 
  8 | 
  9 | # Some of the common image classification datasets from tfds are added here. Exhaustive list can be found here
 10 | # (https://www.tensorflow.org/datasets/catalog/overview)
 11 | 
 12 | TENSORFLOW_DATASET_NAMES = [
 13 |     "caltech101",
 14 |     "caltech_birds2010",
 15 |     "caltech_birds2011",
 16 |     "cars196",
 17 |     "cats_vs_dogs",
 18 |     "cifar10",
 19 |     "cifar10_1",  # Test set for Cifar  10
 20 |     "cifar10_corrupted",  # Generated by adding 15 common corruptions 4 extra corruptions to the test images in Cifar10
 21 |     "cifar100",
 22 |     "fashion_mnist",
 23 |     "food101",
 24 |     "imagenette/full-size-v2",    # Imagenette is a subset of imagnet dataset with 10 classes (9,469 train images and 3,925 validation images)
 25 |     "i_naturalist2017",
 26 |     "mnist",
 27 |     "mnist_corrupted",  # Generated by adding 15 common corruptions 4 extra corruptions to the test images in mnist
 28 |     "omniglot",
 29 |     "oxford_flowers102",
 30 |     "oxford_iiit_pet"
 31 | ]
 32 | 
 33 | arguments = [
 34 |     ['list[str]', 'train_list', ['RandomCropThenResize', 'RandomHorizontalFlip', 'Normalize'], 'List all the data augmentations separated by comma for training data'],
 35 |     ['list[str]', 'val_list', ['Resize', 'CentralCrop', 'Normalize'], 'List all the data augmentations separated by comma for validation data'],
 36 |     [str, "data_dir", '', "directory to read/write data. Defaults to  \"~/tensorflow_datasets\""],
 37 |     [str, 'name', 'imagenet', 'Choose the dataset to be used for training'],
 38 |     [str, 'train_split_id', 'train', ''],
 39 |     [str, 'val_split_id', 'validation', ''],
 40 |     [int, 'batch_size', 0, 'The size of batch per gpu', lambda x: x > 0],
 41 | ] + augmentations.arguments
 42 | 
 43 | 
 44 | def get_map_fn(transformation_list: List[str], param_dict: Dict, n_classes: int):
 45 |   """return the map function applying data augmentation to image and transforming label to one hot vector
 46 | 
 47 |   Args:
 48 |     param_dict: dict containing transformation name as key and list of corresponding parameters as value
 49 |   """
 50 |   def map_fn(image, label):
 51 |     label = tf.one_hot(label, n_classes)
 52 |     image = augmentations.apply_list_of_transformations(image, transformation_list, param_dict)
 53 |     return image, label
 54 |   return map_fn
 55 | 
 56 | 
 57 | class TFRecordExtractor:
 58 |   """
 59 |   This class extracts the stored tfrecord from the specified directory based on their split (train/val/test)
 60 |   and creates a tensorflow dataset object. It assumes that all the tfrecord files are stored in the same directory than the dataset
 61 |   meta-data file named `dataset_info.yaml`
 62 |   """
 63 | 
 64 |   def __init__(self, dataset_name, data_dir, split):
 65 |     if not os.path.exists(data_dir):
 66 |       raise ValueError(f"there is no directory by {data_dir}")
 67 |     if not os.path.exists(os.path.join(data_dir, dataset_name)):
 68 |       raise ValueError(f"there is no dataset by the name of {dataset_name} in directory {data_dir}")
 69 |     self.data_dir = os.path.join(data_dir, dataset_name)
 70 |     self.split = split
 71 | 
 72 |   def __get_tfrecord_files_from_dataset_info_file(self):
 73 |     """
 74 |     get the name of all tfrecord files in the dataset meta-data file named `dataset_info.yaml`
 75 |     """
 76 |     yaml_file = os.path.join(self.data_dir, 'dataset_info.yaml')
 77 |     with open(yaml_file, 'r') as stream:
 78 |       try:
 79 |         dataset_info = yaml.safe_load(stream)
 80 |       except yaml.YAMLError as e:
 81 |         print('Error parsing file', yaml_file)
 82 |         raise e
 83 |     tfrecord_files = [os.path.join(self.data_dir, path) for path in
 84 |                       dataset_info["splits"][self.split]["tfrecord_files"]]
 85 |     return tfrecord_files
 86 | 
 87 |   def __extract_fn(self, tfrecord):
 88 |     """Extract tfrecord and decode it to image and label
 89 |     """
 90 |     feature_description = {
 91 |         'image': tf.io.FixedLenFeature([], tf.string),
 92 |         'label': tf.io.FixedLenFeature([], tf.int64),
 93 |         'size': tf.io.FixedLenFeature([2], tf.int64)
 94 |     }
 95 |     # Extract the data record
 96 |     sample = tf.io.parse_single_example(tfrecord, feature_description)
 97 |     image = tf.io.decode_image(sample['image'], channels=3)
 98 |     image = tf.reshape(image, [sample['size'][0], sample['size'][1], 3])  # TODO this line should be useless ?
 99 |     label = sample['label']
100 |     return (image, label)
101 | 
102 |   def get_tf_dataset(self):
103 |     """Creates tensorflow dataset object from `tfrecord_files`
104 |     """
105 |     tfrecord_files = self.__get_tfrecord_files_from_dataset_info_file()
106 |     return tf.data.TFRecordDataset(tfrecord_files).map(self.__extract_fn)
107 | 
108 | 
109 | def is_training(config, split):
110 |   return split == config['train_split_id']
111 | 
112 | 
113 | def get_dataset_from_custom_tfrecord(config, transformation_list: List[str], num_classes: int, split,
114 |                                      num_parallel_calls=tf.data.experimental.AUTOTUNE, buffer_multiplier=15):
115 |   """load a custom dataset from tf record
116 |   """
117 |   map_fn = get_map_fn(transformation_list, config, num_classes)
118 |   # list_files shuffle the files name
119 |   dataset = TFRecordExtractor(config['name'], config['data_dir'], split).get_tf_dataset()
120 | 
121 |   if is_training(config, split):
122 |     dataset = dataset.shuffle(config['batch_size'] * buffer_multiplier)
123 |   dataset = dataset.map(map_fn, num_parallel_calls=num_parallel_calls).\
124 |       batch(config['batch_size']).\
125 |       prefetch(tf.data.experimental.AUTOTUNE)
126 |   return dataset
127 | 
128 | 
129 | def get_dataset_from_tfds(config, transformation_list: List[str], num_classes: int, split,
130 |                           num_parallel_calls=tf.data.experimental.AUTOTUNE, buffer_multiplier=15):
131 |   """load a dataset managed by tfds
132 | 
133 |   Args:
134 |     config : Dict containing 'name', 'data_dir', 'batch_size'
135 |     split: 'train' or 'test'
136 |   """
137 |   dataset = tfds.load(name=config['name'], split=split, data_dir=config['data_dir'],
138 |                       shuffle_files=is_training, as_supervised=True)
139 |   map_fn = get_map_fn(transformation_list, config, num_classes)
140 |   if is_training(config, split):
141 |     dataset = dataset.shuffle(config['batch_size'] * buffer_multiplier)
142 | 
143 |   dataset = dataset.map(map_fn, num_parallel_calls=num_parallel_calls).\
144 |       batch(config['batch_size']).\
145 |       prefetch(tf.data.experimental.AUTOTUNE)
146 |   return dataset
147 | 
148 | 
149 | # TODO test with autotune, else replace to a value in conf
150 | def get_dataset(config, transformation_list: List[str], num_classes: int, split,
151 |                           num_parallel_calls=tf.data.experimental.AUTOTUNE, buffer_multiplier=15):
152 |   get_function = get_dataset_from_tfds if config["name"] in TENSORFLOW_DATASET_NAMES else get_dataset_from_custom_tfrecord
153 |   return get_function(config, transformation_list, num_classes, split, num_parallel_calls, buffer_multiplier)
154 | 


--------------------------------------------------------------------------------
/src/inference_benchmark.py:
--------------------------------------------------------------------------------
  1 | """this script should be called by ../inference_benchmark.py in a docker
  2 | it has very few dependance so it should run on any environment having tensorflow installed
  3 | Please, only import official python package or tensorflow dependance
  4 | """
  5 | import argparse
  6 | import json
  7 | import os
  8 | import shutil
  9 | import time
 10 | from collections import defaultdict
 11 | 
 12 | import numpy as np
 13 | import tensorflow as tf
 14 | from tensorflow.python.framework import convert_to_constants
 15 | from tensorflow.python.saved_model import tag_constants
 16 | 
 17 | from models import model_name_to_class
 18 | 
 19 | import sys
 20 | sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])), '../submodules/global_dl/training'))
 21 | from trt_convert import convert_to_tensorrt
 22 | 
 23 | 
 24 | # This method was working with previous version of Tensorflow, but doesn't seem to work with TF 2.4. 
 25 | # I didn't find anything in the doc to reimplement it with newer method of tensorflow. But maybe it will be 
 26 | # possible with next version of TF
 27 | # def get_gpu_info() -> defaultdict:
 28 | #   """
 29 | #   Returns: e.g.
 30 | #       defaultdict(<class 'list'>, 
 31 | #       {
 32 | #           'device': ['0', '1'], 
 33 | #           'name': ['TITAN V', 'Quadro RTX 8000'], 
 34 | #           'pci bus id': ['0000:41:00.0', '0000:07:00.0'], 
 35 | #           'compute capability': ['7.0', '7.5']
 36 | #       })
 37 | #   """
 38 | #   get_info = defaultdict(list)
 39 | #   for i in tf.config.list_physical_devices():
 40 | #     if "compute capability: " in i.physical_device_desc:
 41 | #       for j in i.physical_device_desc.split(", "):
 42 | #         get_info[j.split(": ")[0]].append(j.split(": ")[1])
 43 | #   # TODO handle the else part
 44 | #   return get_info
 45 | 
 46 | 
 47 | def str2bool(v):
 48 |   """custom argpase type to be able to parse boolean
 49 |   see : 
 50 |   https://stackoverflow.com/questions/15008758/parsing-boolean-values-with-argparse
 51 |   """
 52 |   if isinstance(v, bool):
 53 |     return v
 54 |   if v.lower() in ('yes', 'true', 't', 'y', '1'):
 55 |     return True
 56 |   elif v.lower() in ('no', 'false', 'f', 'n', '0'):
 57 |     return False
 58 |   else:
 59 |     raise argparse.ArgumentTypeError('Boolean value expected.')
 60 | 
 61 | 
 62 | def parse_args():
 63 |   """Parse the command line and return the configuration to run the benchmark
 64 | 
 65 |   Returns:
 66 |       tuple: name of the model, engine
 67 |   """
 68 |   desc = "Inference benchmark"
 69 |   parser = argparse.ArgumentParser(description=desc)
 70 |   # Set of parameters needed for every benchmark
 71 |   parser.add_argument('--batch_size',    type=int,   default=1,            help='batch_size')
 72 |   parser.add_argument('--engine',        type=str,   default="tensorflow", help='Specify the engine: tensorflow or upstride_X')
 73 |   parser.add_argument('--factor',        type=float, default=1.,           help='factor to divide the number of channels')
 74 |   parser.add_argument('--model_name',    type=str,   default='VGG16',      help='Specify the name of the model')
 75 |   parser.add_argument('--n_steps',       type=int,   default=20,           help='number of iterations to benchmark speed')
 76 |   parser.add_argument('--profiler_path', type=str,   default="/tmp/prof",  help='path where the tensorboard profiler will be saved')
 77 |   parser.add_argument('--xla',           type=str2bool, nargs='?', const=True, default=False, help='if specify then run XLA compilation')
 78 | 
 79 |   # set or parameters specific to TensorRT
 80 |   parser.add_argument("--export_tensorrt",    type=str2bool, nargs='?', const=True, default=False, help="specify if model requires tensorrt conversion")
 81 |   parser.add_argument('--tensorrt_precision', type=str, default='FP32', help='Provide precision FP32 or FP16 for optimizing tensorrt')
 82 | 
 83 |   # If need to load a trained model
 84 |   parser.add_argument('--model_path',       type=str, default=None, help='Specify the model path')
 85 | 
 86 |   args = parser.parse_args()
 87 |   return vars(args)
 88 | 
 89 | 
 90 | def random_data_iterator(shape, n, min, max):
 91 |   """Simulate a dataset by generating random normalized images
 92 | 
 93 |   Args:
 94 |       shape (List): shape of the image. In most of the cases, (224, 224, 3)
 95 |       n (int): number of images to generate
 96 |       min (float): min of the random distribution
 97 |       max (float): max of the random distribution
 98 | 
 99 |   Yields:
100 |       np.ndarray: generated images
101 |   """
102 |   for _ in range(n):
103 |     data = np.random.random(shape)  # between [0, 1)
104 |     data = data * (max-min) + min
105 |     yield tf.constant(data.astype(np.float32))
106 | 
107 | 
108 | def model_load_serve(path):
109 |   saved_model = tf.saved_model.load(path, tags=[tag_constants.SERVING])
110 |   model = saved_model.signatures['serving_default']
111 |   return convert_to_constants.convert_variables_to_constants_v2(model)
112 | 
113 | 
114 | def benchmark(config):
115 |   # 1 Tensorflow configuration
116 |   # GPU should be configured to have a progressive memory growth, else some specific configurations may crashed (TF 2.0 on RTX2000 for instance)
117 |   physical_devices = tf.config.list_physical_devices('GPU')
118 |   for physical_device in physical_devices:
119 |     tf.config.experimental.set_memory_growth(physical_device, True)
120 |   if config['xla']:
121 |     tf.config.optimizer.set_jit(True)
122 | 
123 |   # 2 Model preparation
124 |   if config['engine'] == 'tensorflow':
125 |     upstride_type = -1
126 |   else:
127 |     upstride_type = int(config['engine'][-1])
128 | 
129 |   kwargs = {
130 |     'input_size': [224, 224, 3],
131 |     'num_classes': 10,
132 |     'factor': config['factor'],
133 |     'upstride_type': upstride_type,
134 |     'changing_ids': []
135 |   }
136 | 
137 |   model = model_name_to_class[config['model_name']](**kwargs).build()
138 |   n_params = model.count_params()
139 | 
140 |   # 3 Maybe convert to TensorRT.
141 |   # To do this, we save the model, remove it from memory then reload it using TensorRT
142 |   tmp_dir = "/tmp/temp_dir"
143 |   if config['export_tensorrt']:
144 |     # save the model
145 |     tf.saved_model.save(model, tmp_dir)
146 | 
147 |     # Remove it 
148 |     del model
149 | 
150 |     # Reload using tensorRT
151 |     trt_path = convert_to_tensorrt(
152 |         tmp_dir,
153 |         image_size=[224, 224, 3],  # for now its hard coded.
154 |         batch_size=config['batch_size'],
155 |         precision=config['tensorrt_precision']
156 |     )
157 |     print(f'loading TensorRT model from path {trt_path}')
158 |     model = model_load_serve(trt_path)
159 | 
160 |   # 4 prepare the environment
161 |   os.makedirs(config['profiler_path'], exist_ok=True)
162 | 
163 |   # A few iteration to init the model
164 |   print("first iteration")
165 |   for data in random_data_iterator((config['batch_size'], 224, 224, 3), 1, -1, 1):
166 |     model(data)
167 |   print("first iteration done")
168 | 
169 |   os.makedirs(config['profiler_path'], exist_ok=True)
170 |   tf.profiler.experimental.start(os.path.join(config['profiler_path'], 'logs_{}'.format(config['engine'])))
171 |   for data in random_data_iterator((config['batch_size'], 224, 224, 3), 5, -1, 1):
172 |     model(data)
173 |   tf.profiler.experimental.stop()
174 | 
175 |   # Benchmark
176 |   start_time = time.time()
177 |   for data in random_data_iterator((config['batch_size'], 224, 224, 3), config['n_steps'], -1, 1):
178 |     model(data)
179 |   end_time = time.time()
180 | 
181 |   # TODO reactivate this part as soon as we know how to do it with modern version of TF
182 |   # try:
183 |   #   gpu = get_gpu_info().get('name')[0]
184 |   # except TypeError:
185 |   #   gpu = 'cpu only'
186 | 
187 |   output = {
188 |       'total_time': end_time - start_time,
189 |       'n_iterations': config['n_steps'],
190 |       'n_params': n_params,
191 |       'tensorrt': config['export_tensorrt'],
192 |       # 'gpu': gpu
193 |   }
194 |   print(json.dumps(output))
195 | 
196 |   # clean up
197 |   if os.path.exists(tmp_dir):
198 |     shutil.rmtree(tmp_dir)
199 | 
200 | 
201 | if __name__ == "__main__":
202 |   config = parse_args()
203 |   benchmark(config)
204 | 


--------------------------------------------------------------------------------
/src/test_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | import tempfile
  4 | import unittest
  5 | 
  6 | import cv2
  7 | import numpy as np
  8 | import tensorflow as tf
  9 | 
 10 | from .data import dataloader
 11 | from .data.test_dataloader import create_fake_dataset
 12 | from .models.generic_model import GenericModel
 13 | from .utils import (init_custom_checkpoint_callbacks, copy_and_resize, get_imagenet_data, get_partial_paths,
 14 |                     get_paths, get_synset, get_val_label_dict)
 15 | 
 16 | 
 17 | class Model1(GenericModel):
 18 |   def model(self):
 19 |     self.x = self.layers().Flatten()(self.x)
 20 |     self.x = self.layers().Dense(10)(self.x)
 21 | 
 22 | 
 23 | class TestUtils(unittest.TestCase):
 24 |   def test_copy_and_resize(self):
 25 |     source = create_fake_dataset()
 26 |     dest = tempfile.mkdtemp()
 27 |     copy_and_resize(source, dest, 256)
 28 |     self.assertEqual(sorted(os.listdir(dest)), ['cat', 'dog'])
 29 |     self.assertEqual(sorted(os.listdir(os.path.join(dest, 'cat'))), ['0.jpg', '1.jpg'])
 30 |     self.assertEqual(sorted(os.listdir(os.path.join(dest, 'dog'))), ['0.jpg', '1.jpg'])
 31 |     self.assertEqual(cv2.imread(os.path.join(dest, 'dog', '1.jpg')).shape[0], 256)
 32 |     shutil.rmtree(source)
 33 |     shutil.rmtree(dest)
 34 | 
 35 |   def test_copy_and_resize_final_slash(self):
 36 |     source = create_fake_dataset()
 37 |     source += '/'
 38 |     dest = tempfile.mkdtemp()
 39 |     copy_and_resize(source, dest, 256)
 40 |     self.assertEqual(sorted(os.listdir(dest)), ['cat', 'dog'])
 41 |     self.assertEqual(sorted(os.listdir(os.path.join(dest, 'cat'))), ['0.jpg', '1.jpg'])
 42 |     self.assertEqual(sorted(os.listdir(os.path.join(dest, 'dog'))), ['0.jpg', '1.jpg'])
 43 |     self.assertEqual(cv2.imread(os.path.join(dest, 'dog', '1.jpg')).shape[0], 256)
 44 |     shutil.rmtree(source)
 45 |     shutil.rmtree(dest)
 46 | 
 47 |   def test_get_synset(self):
 48 |     synset = get_synset("ressources/testing/fake_LOC_synset_mapping.txt")
 49 |     self.assertEqual(synset['n01496331'], 5)
 50 | 
 51 |   def test_get_paths(self):
 52 |     training_dataset = create_fake_training_data()
 53 |     val_dataset = create_fake_val_data()
 54 |     training_images = get_paths(training_dataset)
 55 |     val_images = get_paths(val_dataset)
 56 |     self.assertEqual(len(training_images), 20)
 57 |     self.assertEqual(len(val_images), 2)
 58 | 
 59 |   def test_get_val_label_dict(self):
 60 |     val_dict = get_val_label_dict("ressources/testing/fake_LOC_val_solution.csv")
 61 |     self.assertEqual(val_dict['ILSVRC2012_val_0'], 'n01484850')
 62 | 
 63 |   def test_get_partial_paths(self):
 64 |     training = create_fake_training_data()
 65 |     paths = get_partial_paths(training, 50)
 66 |     self.assertEqual(len(paths), 10)
 67 |     paths = get_partial_paths(training, 100)
 68 |     self.assertEqual(len(paths), 20)
 69 |     paths = get_partial_paths(training, 40)
 70 |     self.assertEqual(len(paths), 0)
 71 | 
 72 |   def test_get_imagenet_data(self):
 73 |     train_dir = create_fake_training_data()
 74 |     val_dir = create_fake_val_data()
 75 |     synset_path = "ressources/testing/fake_LOC_synset_mapping.txt"
 76 |     training_percentage = 100
 77 |     val_gt_path = "ressources/testing/fake_LOC_val_solution.csv"
 78 |     imagenet_data = get_imagenet_data({'synset_path': synset_path,
 79 |                                        'train_dir': train_dir,
 80 |                                        'train_data_percentage': training_percentage,
 81 |                                        'val_dir': val_dir,
 82 |                                        'val_gt_path': val_gt_path})
 83 |     train_paths, train_labels, val_paths, val_labels = imagenet_data
 84 |     self.assertEqual(type(train_paths[0]), str)
 85 |     self.assertEqual(type(val_paths[0]), str)
 86 |     self.assertEqual(type(train_labels[0]), int)
 87 |     self.assertEqual(type(val_labels[0]), int)
 88 |     self.assertEqual(len(train_paths), 20)
 89 |     self.assertEqual(len(train_labels), 20)
 90 |     self.assertEqual(len(val_paths), 2)
 91 |     self.assertEqual(len(val_labels), 2)
 92 | 
 93 |   def test_data_pipeline_imagenet_data(self):
 94 |     train_dir = create_fake_training_data()
 95 |     val_dir = create_fake_val_data()
 96 |     synset_path = "ressources/testing/fake_LOC_synset_mapping.txt"
 97 |     training_percentage = 100
 98 |     val_gt_path = "ressources/testing/fake_LOC_val_solution.csv"
 99 |     imagenet_data = get_imagenet_data({'synset_path': synset_path,
100 |                                        'train_dir': train_dir,
101 |                                        'train_data_percentage': training_percentage,
102 |                                        'val_dir': val_dir,
103 |                                        'val_gt_path': val_gt_path})
104 |     train_paths, train_labels, val_paths, val_labels = imagenet_data
105 |     dataset = dataloader.get_dataset(train_paths, train_labels, n_classes=10, batch_size=2)
106 |     i = 0
107 |     for image, label in dataset:
108 |       self.assertEqual(label.numpy().shape, (2, 10))
109 |       self.assertTrue(label.numpy()[0, 0] in [0, 1])
110 |       self.assertTrue(label.numpy()[1, 1] in [0, 1])
111 |       self.assertEqual(image.numpy().shape, (2, 224, 224, 3))
112 |       i += 1
113 | 
114 |     self.assertEqual(i, 10)
115 | 
116 |     shutil.rmtree(train_dir)
117 |     shutil.rmtree(val_dir)
118 | 
119 |   def test_init_custom_checkpoint_callbacks(self):
120 |     model = Model1('tensorflow', factor=1).model
121 |     optimizer = tf.keras.optimizers.Adam(lr=0.001)
122 |     model.compile(optimizer=optimizer, loss='categorical_crossentropy')
123 |     ckpt_dir = tempfile.mkdtemp()
124 |     callback, latest_epoch = init_custom_checkpoint_callbacks(model, optimizer, ckpt_dir, max_ckpt=5, save_frequency=1)
125 |     self.assertEqual(os.listdir(ckpt_dir), [])
126 |     self.assertEqual(latest_epoch, 0)
127 | 
128 |     # train for one epoch
129 |     train_dir = create_fake_training_data()
130 |     val_dir = create_fake_val_data()
131 |     synset_path = "ressources/testing/fake_LOC_synset_mapping.txt"
132 |     training_percentage = 100
133 |     val_gt_path = "ressources/testing/fake_LOC_val_solution.csv"
134 |     imagenet_data = get_imagenet_data({'synset_path': synset_path,
135 |                                        'train_dir': train_dir,
136 |                                        'train_data_percentage': training_percentage,
137 |                                        'val_dir': val_dir,
138 |                                        'val_gt_path': val_gt_path})
139 |     train_paths, train_labels, val_paths, val_labels = imagenet_data
140 |     train_dataset = dataloader.get_dataset(train_paths, train_labels, n_classes=10, batch_size=2)
141 | 
142 |     model.fit(x=train_dataset,
143 |               epochs=1,
144 |               callbacks=[callback],
145 |               max_queue_size=16,
146 |               workers=8,
147 |               )
148 | 
149 |     # check that ckpts were written
150 |     files = os.listdir(ckpt_dir)
151 |     self.assertTrue('checkpoint' in files)
152 |     self.assertTrue('ckpt-1.index' in files)
153 |     self.assertTrue('ckpt-1.data-00000-of-00002' in files)
154 |     self.assertTrue('ckpt-1.data-00001-of-00002' in files)
155 | 
156 |     # train for 5 more epochs
157 |     model.fit(x=train_dataset,
158 |               epochs=5,
159 |               callbacks=[callback],
160 |               max_queue_size=16,
161 |               workers=8,
162 |               )
163 | 
164 |     # check that ckpt-1 was remove and ckpt 2,3,4,5,6 added
165 |     files = os.listdir(ckpt_dir)
166 |     self.assertTrue('checkpoint' in files)
167 |     self.assertFalse('ckpt-1.index' in files)
168 |     self.assertTrue('ckpt-2.index' in files)
169 |     self.assertTrue('ckpt-3.index' in files)
170 |     self.assertTrue('ckpt-4.index' in files)
171 |     self.assertTrue('ckpt-5.index' in files)
172 |     self.assertTrue('ckpt-6.index' in files)
173 | 
174 |     # check that we can load the last checkpoint
175 |     del model
176 |     del optimizer
177 |     del callback
178 |     model = Model1('tensorflow', factor=1).model
179 |     optimizer = tf.keras.optimizers.Adam(lr=0.001)
180 |     model.compile(optimizer=optimizer, loss='categorical_crossentropy')
181 |     callback, latest_epoch = init_custom_checkpoint_callbacks(model, optimizer, ckpt_dir, max_ckpt=5, , save_frequency=1)
182 |     self.assertEqual(latest_epoch, 6)
183 | 
184 |     shutil.rmtree(train_dir)
185 |     shutil.rmtree(val_dir)
186 |     shutil.rmtree(ckpt_dir)
187 | 
188 | 
189 | def create_fake_training_data():
190 |   dataset_dir = tempfile.mkdtemp()
191 |   dirs = ['n01440764', 'n01443537', 'n01484850', 'n01491361', 'n01494475', 'n01496331',
192 |           'n01498041', 'n01514668', 'n01514859', 'n01518878']
193 |   for d in dirs:
194 |     os.makedirs(os.path.join(dataset_dir, d))
195 |     for i in range(2):
196 |       cv2.imwrite(os.path.join(dataset_dir, d, '{}_{}.JPEG'.format(d, i)), np.ones((640, 480, 3), dtype=np.uint8) * 255)
197 |   return dataset_dir
198 | 
199 | 
200 | def create_fake_val_data():
201 |   dataset_dir = tempfile.mkdtemp()
202 |   for i in range(2):
203 |     cv2.imwrite(os.path.join(dataset_dir, 'ILSVRC2012_val_{}.JPEG'.format(i)), np.ones((640, 480, 3), dtype=np.uint8) * 255)
204 |   return dataset_dir
205 | 


--------------------------------------------------------------------------------
/src/models/mobilenet.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from .generic_model import GenericModelBuilder
  3 | 
  4 | BATCHNORM_MOMENTUM = 0.9
  5 | 
  6 | # This function is taken from the original tf repo.
  7 | # It ensures that all layers have a channel number that is divisible by 8
  8 | # It can be seen here:
  9 | # https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
 10 | 
 11 | 
 12 | def _make_divisible(v, divisor, min_value=None):
 13 |   if min_value is None:
 14 |     min_value = divisor
 15 |   new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 16 |   # Make sure that round down does not go down by more than 10%.
 17 |   if new_v < 0.9 * v:
 18 |     new_v += divisor
 19 |   return new_v
 20 | 
 21 | 
 22 | def correct_pad(inputs, kernel_size, is_channels_first):
 23 |   """Returns a tuple for zero-padding for 2D convolution with downsampling.
 24 |   Args:
 25 |       input_size: An integer or tuple/list of 2 integers.
 26 |       kernel_size: An integer or tuple/list of 2 integers.
 27 |   Returns:
 28 |       A tuple.
 29 |   """
 30 |   if type(inputs) == list:
 31 |     inputs = inputs[0]
 32 |   input_size = inputs.shape[2:4] if is_channels_first else inputs.shape[1:3]
 33 |   if isinstance(kernel_size, int):
 34 |     kernel_size = (kernel_size, kernel_size)
 35 |   adjust = (1, 1) if input_size[0] is None else (1 - input_size[0] % 2, 1 - input_size[1] % 2)
 36 |   correct = (kernel_size[0] // 2, kernel_size[1] // 2)
 37 |   return ((correct[0] - adjust[0], correct[0]), (correct[1] - adjust[1], correct[1]))
 38 | 
 39 | 
 40 | class _MobileNetV2(GenericModelBuilder):
 41 |   def __init__(self, *args, **kwargs):
 42 |     super().__init__(*args, **kwargs)
 43 |     self.last_block_output_shape = 3
 44 | 
 45 |   def _inverted_res_block(self, x, expansion, stride, alpha, filters, block_id):
 46 |     layers = self.layers  # we don't want to switch between tf and upstride in this block
 47 |     in_channels = self.last_block_output_shape
 48 | 
 49 |     pointwise_conv_filters = int(filters * alpha)
 50 |     pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
 51 |     inputs = x
 52 |     prefix = 'block_{}_'.format(block_id)
 53 | 
 54 |     if block_id:
 55 |       # Expand
 56 |       x = layers.Conv2D((expansion * in_channels), kernel_size=1, padding='same', use_bias=False, name=prefix + 'expand', kernel_regularizer=self.weight_regularizer)(x)
 57 |       x = layers.BatchNormalization(axis=self.channel_axis, epsilon=1e-3, momentum=BATCHNORM_MOMENTUM, name=prefix + 'expand_BN')(x)
 58 |       x = layers.ReLU(6., name=prefix + 'expand_relu')(x)
 59 |     else:
 60 |       prefix = 'expanded_conv_'
 61 | 
 62 |     # Depthwise
 63 |     if stride == 2:
 64 |       x = layers.ZeroPadding2D(padding=correct_pad(x, 3, self.is_channels_first), name=prefix + 'pad')(x)
 65 |     x = layers.DepthwiseConv2D(kernel_size=3, strides=stride, activation=None, use_bias=False, padding='same' if stride == 1 else 'valid',
 66 |                                     name=prefix + 'depthwise', depthwise_regularizer=self.weight_regularizer)(x)
 67 |     x = layers.BatchNormalization(axis=self.channel_axis, epsilon=1e-3, momentum=BATCHNORM_MOMENTUM, name=prefix + 'depthwise_BN')(x)
 68 | 
 69 |     x = layers.ReLU(6., name=prefix + 'depthwise_relu')(x)
 70 | 
 71 |     # Project
 72 |     x = layers.Conv2D(pointwise_filters, kernel_size=1, padding='same', use_bias=False, activation=None,
 73 |                            name=prefix + 'project', kernel_regularizer=self.weight_regularizer)(x)
 74 |     x = layers.BatchNormalization(axis=self.channel_axis, epsilon=1e-3, momentum=BATCHNORM_MOMENTUM, name=prefix + 'project_BN')(x)
 75 | 
 76 |     if in_channels == pointwise_filters and stride == 1:
 77 |       x = layers.Add(name=prefix + 'add')([inputs, x])
 78 |     self.last_block_output_shape = pointwise_filters
 79 |     return x
 80 | 
 81 |   def model(self, x, alpha=1.0):
 82 |     """Instantiates the MobileNetV2 architecture.
 83 |     Args:
 84 |         alpha: controls the width of the network. This is known as the
 85 |         width multiplier in the MobileNetV2 paper, but the name is kept for
 86 |         consistency with MobileNetV1 in Keras.
 87 |             - If `alpha` < 1.0, proportionally decreases the number
 88 |                 of filters in each layer.
 89 |             - If `alpha` > 1.0, proportionally increases the number
 90 |                 of filters in each layer.
 91 |             - If `alpha` = 1, default number of filters from the paper
 92 |                 are used at each layer.
 93 |     """
 94 |     weight_regularizer = self.weight_regularizer
 95 | 
 96 |     first_block_filters = _make_divisible(32 * alpha // self.factor, 8)
 97 |     x = self.layers.ZeroPadding2D(padding=correct_pad(x, 3, self.is_channels_first), name='Conv1_pad')(x)
 98 |     x = self.layers.Conv2D(first_block_filters, kernel_size=3, strides=self.first_conv_stride, padding='valid',
 99 |                                   use_bias=False, name='Conv1', kernel_regularizer=self.weight_regularizer)(x)
100 |     x = self.layers.BatchNormalization(axis=self.channel_axis, epsilon=1e-3, momentum=BATCHNORM_MOMENTUM, name='bn_Conv1')(x)
101 |     x = self.layers.ReLU(6., name='Conv1_relu')(x)
102 | 
103 |     self.last_block_output_shape = first_block_filters
104 | 
105 |     block_id = 0
106 |     for configuration in self.configurations:
107 |       for i in range(configuration[1]):
108 |         stride = configuration[2] if i == 0 else 1
109 |         x = self._inverted_res_block(x, filters=configuration[0]//self.factor, alpha=alpha, stride=stride, expansion=configuration[3], block_id=block_id)
110 |         block_id += 1
111 | 
112 |     # no alpha applied to last conv as stated in the paper:
113 |     # if the width multiplier is greater than 1 we
114 |     # increase the number of output channels
115 |     if alpha > 1.0:
116 |       last_block_filters = _make_divisible(1280 * alpha, 8)
117 |     else:
118 |       last_block_filters = 1280
119 |     last_block_filters = last_block_filters // self.factor
120 | 
121 |     x = self.layers.Conv2D(last_block_filters, kernel_size=1, use_bias=False, name='Conv_1', kernel_regularizer=self.weight_regularizer)(x)
122 |     x = self.layers.BatchNormalization(axis=self.channel_axis, epsilon=1e-3, momentum=BATCHNORM_MOMENTUM, name='Conv_1_bn')(x)
123 |     x = self.layers.ReLU(6., name='out_relu')(x)
124 | 
125 |     x = self.layers.GlobalAveragePooling2D()(x)
126 |     return x
127 | 
128 | 
129 | class MobileNetV2(_MobileNetV2):
130 |   def __init__(self, *args, **kwargs):
131 |     # (channels, num_blocks, stride, expansion)
132 |     self.first_conv_stride = 2
133 |     self.configurations = [(16, 1, 1, 1),
134 |                            (24, 2, 2, 6),
135 |                            (32, 3, 2, 6),
136 |                            (64, 4, 2, 6),
137 |                            (96, 3, 1, 6),
138 |                            (160, 3, 2, 6),
139 |                            (320, 1, 1, 6)]
140 |     super().__init__(*args, **kwargs)
141 | 
142 | 
143 | class MobileNetV2Cifar10(_MobileNetV2):
144 |   def __init__(self, *args, **kwargs):
145 |     # (channels, num_blocks, stride, expansion)
146 |     self.first_conv_stride = 1
147 |     self.configurations = [(16, 1, 1, 1),
148 |                            (24, 2, 1, 6),
149 |                            (32, 3, 2, 6),
150 |                            (64, 4, 2, 6),
151 |                            (96, 3, 1, 6),
152 |                            (160, 3, 2, 6),
153 |                            (320, 1, 1, 6)]
154 |     super().__init__(*args, **kwargs)
155 | 
156 | class MobileNetV2Cifar10_2(_MobileNetV2):
157 |   def __init__(self, *args, **kwargs):
158 |     # (channels, num_blocks, stride, expansion)
159 |     self.first_conv_stride = 1
160 |     self.configurations = [(16, 1, 1, 1),
161 |                            (24, 2, 1, 6),
162 |                            (32, 3, 1, 6),
163 |                            (64, 4, 2, 6),
164 |                            (96, 3, 1, 6),
165 |                            (160, 3, 2, 6),
166 |                            (320, 1, 1, 6)]
167 |     super().__init__(*args, **kwargs)
168 | 
169 | 
170 | class MobileNetV2Cifar10Hyper(_MobileNetV2):
171 |   def __init__(self, *args, **kwargs):
172 |     super().__init__(*args, **kwargs)
173 | 
174 |   def model(self, x):
175 |     # (channels, num_blocks, stride, expansion)
176 |     self.first_conv_stride = 1
177 | 
178 |     # define 10 MobileNetv2 versions
179 |     blocks_family = [
180 |         [1, 1, 1, 1, 1, 1, 1],
181 |         [1, 1, 1, 2, 1, 1, 1],
182 |         [1, 1, 1, 2, 2, 1, 1],
183 |         [1, 1, 2, 2, 2, 1, 1],
184 |         [1, 1, 2, 2, 2, 2, 1],
185 |         [1, 1, 2, 3, 2, 2, 1],
186 |         [1, 2, 2, 3, 2, 2, 1],
187 |         [1, 2, 3, 3, 2, 2, 1],
188 |         [1, 2, 3, 3, 3, 2, 1],
189 |         [1, 2, 3, 3, 3, 3, 1],
190 |         [1, 2, 3, 4, 3, 3, 1]  # Default config
191 |     ]
192 | 
193 |     self.mobilenet_version = self.hp.Int('depth', min_value=0, max_value=10, step=1)
194 |     block = blocks_family[self.mobilenet_version]
195 |     self.configurations = [(16, block[0], 1, 1),
196 |                            (24,  block[1], 1, 6),
197 |                            (32,  block[2], 1, 6),
198 |                            (64,  block[3], 2, 6),
199 |                            (96,  block[4], 1, 6),
200 |                            (160, block[5], 2, 6),
201 |                            (320, block[6], 1, 6)]
202 |     super().model(x)
203 | 


--------------------------------------------------------------------------------
/src/models/alexnet.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from .generic_model import GenericModelBuilder
  3 | 
  4 | 
  5 | class AlexNet(GenericModelBuilder):
  6 |   def model(self, x):
  7 |     # note regarding batch norm : in the official implementation, there are 2 batchnorms.
  8 |     # However, it seems they are hurting the training when using with upstride, so for now there are commented.
  9 |     # Maybe it will change some day, it's why they are commented and not removed
 10 |     x = self.layers.Conv2D(96//self.factor, (11, 11), 4, padding='same',
 11 |                            kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=42),
 12 |                            bias_initializer=tf.keras.initializers.zeros(),
 13 |                            use_bias=False,
 14 |                            name='conv_1')(x)
 15 |     #x = tf.keras.layers.BatchNormalization()(x)
 16 |     x = self.layers.Activation('relu')(x)
 17 |     x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
 18 |     # Layer 2 - Conv
 19 |     x = self.layers.Conv2D(256//self.factor, (5, 5), padding='same',
 20 |                            kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=42),
 21 |                            bias_initializer=tf.keras.initializers.ones(),
 22 |                            use_bias=False,
 23 |                            name='conv_2')(x)
 24 |     #x = tf.keras.layers.BatchNormalization()(x)
 25 |     x = self.layers.Activation('relu')(x)
 26 |     x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
 27 |     # Layer 3 - Conv
 28 |     x = self.layers.Conv2D(384//self.factor, (3, 3), padding='same',
 29 |                            kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=42),
 30 |                            bias_initializer=tf.keras.initializers.zeros(),
 31 |                            use_bias=False,
 32 |                            name='conv_3')(x)
 33 |     x = self.layers.Activation('relu')(x)
 34 |     # Layer 4 - Conv
 35 |     x = self.layers.Conv2D(384//self.factor, (3, 3), padding='same',
 36 |                            kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=42),
 37 |                            bias_initializer=tf.keras.initializers.ones(),
 38 |                            use_bias=False,
 39 |                            name='conv_4')(x)
 40 |     x = self.layers.Activation('relu')(x)
 41 |     # Layer 5 - Conv
 42 |     x = self.layers.Conv2D(256//self.factor, (3, 3), padding='same',
 43 |                            kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=42),
 44 |                            bias_initializer=tf.keras.initializers.ones(),
 45 |                            use_bias=False,
 46 |                            name='conv_5')(x)
 47 |     x = self.layers.Activation('relu')(x)
 48 |     x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
 49 |     # Layer 6 - Fully connected
 50 |     x = self.layers.Flatten()(x)
 51 |     x = self.layers.Dense(4096//self.factor,
 52 |                           kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=42),
 53 |                           bias_initializer=tf.keras.initializers.ones(),
 54 |                           use_bias=False,
 55 |                           name='dense_1')(x)
 56 |     x = self.layers.Activation('relu')(x)
 57 |     x = self.layers.Dropout(0.5, seed=42)(x)
 58 |     # Layer 7 - Fully connected
 59 |     x = self.layers.Dense(4096//self.factor,
 60 |                           kernel_initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=42),
 61 |                           bias_initializer=tf.keras.initializers.ones(),
 62 |                           use_bias=False,
 63 |                           name='dense_2')(x)
 64 |     x = self.layers.Activation('relu')(x)
 65 |     x = self.layers.Dropout(0.5, seed=42)(x)
 66 |     return x
 67 | 
 68 | 
 69 | class AlexNetQ(GenericModelBuilder):
 70 |   def model(self, x):
 71 |     x = self.layers.Conv2D(96//self.factor, (11, 11), 4, padding='same',
 72 |                                   bias_initializer=tf.keras.initializers.zeros(),
 73 |                                   use_bias=False,
 74 |                                   name='conv_1')(x)
 75 |     x = self.layers.BatchNormalization(axis=self.channel_axis)(x)
 76 |     x = self.layers.Activation('relu')(x)
 77 |     x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
 78 |     # Layer 2 - Conv
 79 |     x = self.layers.Conv2D(256//self.factor, (5, 5), padding='same',
 80 |                                   bias_initializer=tf.keras.initializers.ones(),
 81 |                                   use_bias=False,
 82 |                                   name='conv_2')(x)
 83 |     x = self.layers.BatchNormalization(axis=self.channel_axis)(x)
 84 |     x = self.layers.Activation('relu')(x)
 85 |     x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
 86 |     # Layer 3 - Conv
 87 |     x = self.layers.Conv2D(384//self.factor, (3, 3), padding='same',
 88 |                                   bias_initializer=tf.keras.initializers.zeros(),
 89 |                                   use_bias=False,
 90 |                                   name='conv_3')(x)
 91 |     x = self.layers.BatchNormalization(axis=self.channel_axis)(x)
 92 |     x = self.layers.Activation('relu')(x)
 93 |     # Layer 4 - Conv
 94 |     x = self.layers.Conv2D(384//self.factor, (3, 3), padding='same',
 95 |                                   bias_initializer=tf.keras.initializers.ones(),
 96 |                                   use_bias=False,
 97 |                                   name='conv_4')(x)
 98 |     x = self.layers.BatchNormalization(axis=self.channel_axis)(x)
 99 |     x = self.layers.Activation('relu')(x)
100 |     # Layer 5 - Conv
101 |     x = self.layers.Conv2D(256//self.factor, (3, 3), padding='same',
102 |                                   bias_initializer=tf.keras.initializers.ones(),
103 |                                   use_bias=False,
104 |                                   name='conv_5')(x)
105 |     x = self.layers.BatchNormalization(axis=self.channel_axis)(x)
106 |     x = self.layers.Activation('relu')(x)
107 |     x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
108 |     # Layer 6 - Fully connected
109 |     x = self.layers.Flatten()(x)
110 |     x = self.layers.Dense(4096//self.factor,
111 |                                  bias_initializer=tf.keras.initializers.ones(),
112 |                                  use_bias=False,
113 |                                  name='dense_1')(x)
114 |     x = self.layers.BatchNormalization(axis=self.channel_axis)(x)
115 |     x = self.layers.Activation('relu')(x)
116 |     x = self.layers.Dropout(0.5, seed=42)(x)
117 |     # Layer 7 - Fully connected
118 |     x = self.layers.Dense(4096//self.factor,
119 |                                  bias_initializer=tf.keras.initializers.ones(),
120 |                                  use_bias=False,
121 |                                  name='dense_2')(x)
122 |     x = self.layers.Activation('relu')(x)
123 |     x = self.layers.Dropout(0.5, seed=42)(x)
124 |     return x
125 | 
126 | class AlexNetToy(GenericModelBuilder):
127 |   def model(self, x):
128 |     # This model is a mini version of the AlexNet
129 |     x = self.layers.Conv2D(96//self.factor, (11, 11), 4, padding='valid',
130 |                                   bias_initializer=tf.keras.initializers.zeros(),
131 |                                   use_bias=False,
132 |                                   name='conv_1')(x)
133 |     x = self.layers.BatchNormalization(axis=self.channel_axis)(x)
134 |     x = self.layers.Activation('relu')(x)
135 |     x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
136 |     # Layer 2 - Conv
137 |     x = self.layers.Conv2D(128//self.factor, (5, 5), padding='valid',
138 |                                   bias_initializer=tf.keras.initializers.ones(),
139 |                                   use_bias=False,
140 |                                   name='conv_2')(x)
141 |     x = self.layers.BatchNormalization(axis=self.channel_axis)(x)
142 |     x = self.layers.Activation('relu')(x)
143 |     x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
144 |     # Layer 3 - Conv
145 |     x = self.layers.Conv2D(192//self.factor, (3, 3), padding='valid',
146 |                                   bias_initializer=tf.keras.initializers.zeros(),
147 |                                   use_bias=False,
148 |                                   name='conv_3')(x)
149 |     x = self.layers.BatchNormalization(axis=self.channel_axis)(x)
150 |     x = self.layers.Activation('relu')(x)
151 |     # Layer 4 - Conv
152 |     x = self.layers.Conv2D(128//self.factor, (3, 3), padding='valid',
153 |                                   bias_initializer=tf.keras.initializers.ones(),
154 |                                   use_bias=False,
155 |                                   name='conv_5')(x)
156 |     x = self.layers.BatchNormalization(axis=self.channel_axis)(x)
157 |     x = self.layers.Activation('relu')(x)
158 |     x = self.layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
159 |     # Layer 5 - Fully connected
160 |     x = self.layers.Flatten()(x)
161 |     x = self.layers.Dense(2048//self.factor,
162 |                                  bias_initializer=tf.keras.initializers.ones(),
163 |                                  use_bias=False,
164 |                                  name='dense_2')(x)
165 |     x = self.layers.Activation('relu')(x)
166 |     x = self.layers.Dropout(0.5, seed=42)(x)
167 |     return x
168 | 


--------------------------------------------------------------------------------
/documentation/doc.md:
--------------------------------------------------------------------------------
  1 | # Technical documentation
  2 | 
  3 | Before training, you need a dataset. If you're working with a research dataset supported by [TFDS](https://www.tensorflow.org/datasets/catalog/overview#image_classification) then you can skip the first part, else you need to
  4 | convert your dataset to TFRecord format.
  5 | 
  6 | ### Converting the dataset to TFRecord format
  7 | 
  8 | go to `script` directory and use the `tfrecord_writer.py` script. To get a full list of parameters for this script run `python tfrecord_writer.py --help`.
  9 | 
 10 | The parameters you need to provide are :
 11 | 
 12 | - `tfrecord_dir_path`: directory where to store tfrecords
 13 | - `name`: name of the dataset
 14 | - `data.images_dir_path`: directory path for the images
 15 | 
 16 | Default behavior is to save the images in the tfrecord without any processing.
 17 | If you want to add some processing you can :
 18 | - crop the biggest possible square in the middle of the image and then scale to a fix size
 19 | - add a margin to transform the image to a square and the scale it to a fix size
 20 | 
 21 | To do this, set the `processing` parameter to "CENTER_CROP_THEN_SCALE" or "SQUARE_MARGIN_THEN_SCALE" and the `image_size` parameter to desired fix size.
 22 | 
 23 | Last thing to define is how to split the dataset. You may want to only create a training or validation set, but you also
 24 | may need to split the images into different sets in a well balanced fashion. This can be achived using the `data.split_name` and 
 25 | `data.split_percentages` parameters. By default `data.split_name` is a list of 3 elements : `['train', 'validation', 'test']` and
 26 | `data.split_percentages` a list of 3 floats: `[0.8, 0.1, 0.1]`, so 80% of the images will go into the train set, and 10% in validation and test. You can chose as many split you want.
 27 | 
 28 | For example, this command prepare a dataset by center cropping then scaling to 256x256, with 70% in training set and 30% in validation :
 29 | 
 30 | ```bash
 31 | python3 tfrecord_writer.py \
 32 |   --tfrecord_dir_path /path/to/tfrecord
 33 |   --name example_dataset
 34 |   --preprocessing CENTER_CROP_THEN_SCALE
 35 |   --image_size 256 256
 36 |   --data.images_dir_path /path/to/image
 37 |   --data.split_names train validation
 38 |   --data.split_percentages 0.7 0.3
 39 | ```
 40 | 
 41 | ### Start the training
 42 | 
 43 | At this step, you should have a tfrecord ready for training. Now, let's go in details in the configuration of the training.
 44 | 
 45 | #### Setup the Engine
 46 | 
 47 | This code support training with Tensorflow 2.3 and UpStride 1.0. To select the framework use the `framework` parameter. Possible values are :
 48 | 
 49 | - tensorflow: well, you probably know this one
 50 | - upstride_real: should be used when working with the same mathematics than Tensorflow
 51 | - upstride_type1: should be used to deal with 2D data like points, lines, polygons or grayscale images.
 52 | - upstride_type2: should be used to deal with simple 3D data like 3D point cloud, 3D lines, polyhedrons or colored images
 53 | - upstride_type3: should be used to deal with more complex 3D data like 3D point cloud, 3D lines, polyhedrons or colored images
 54 | 
 55 | Please note that when using upstride engine, we highly recommend to reduce the number of channels in the neural network. This can be done using the `factor` parameter. 
 56 | Factor is the division factor to scale the number of channel. factor=2 means the model will have half the number of channels compare to default implementation
 57 | 
 58 | When using upstride_type2, we recommend to use factor=2 or factor=4.
 59 | 
 60 | Of course, if you're working with TensorFlow but with a small dataset, you may also want to change the factor
 61 | 
 62 | #### Setup the neural network
 63 | 
 64 | currently we support :
 65 | 
 66 | - `AlexNet`
 67 | - EfficientNet family, from `EfficientNetB0` to `EfficientNetB7`
 68 | - Resnet family: `ResNet18`, `ResNet34`, `ResNet50`, `ResNet101`, `ResNet152`, `ResNet20CIFAR`, `ResNet32CIFAR`, `ResNet44CIFAR`, `ResNet56CIFAR`, `ResNet18NCHW`, `ResNet34NCHW`, `ResNet50NCHW`, `ResNet101NCHW`, `ResNet152NCHW`
 69 | - MobileNet family: `MobileNetV2`, `MobileNetV2NCHW`
 70 | - NasNet family:`NASNetCIFAR`, `NASNetLarge`, `NASNetMobile`,
 71 | - `SqueezeNet`
 72 | - `TinyDarknet`
 73 | - `VGG16`
 74 | 
 75 | (of course this list will increase as time goes on)
 76 | 
 77 | you can select the model you want to train using the `model_name` parameter. Then you may want to tune the size of the input of the network (parameter `input_size`, by default 224x224x3)
 78 | and the size of the output of the neural network (parameter `num_classes`)
 79 | 
 80 | 
 81 | #### Setup the dataloader and preprocessing
 82 | 
 83 | First we need to give the training script the path to the tfrecord files. If you're working with a research dataset, the name of the dataset is enough, the code will download it automatically. For instance for working with
 84 | cifar10, `dataloader.name` can be set to `cifar10`.
 85 | For custom datasets, you need to provide the name and the path of the tfrecord file (parameter `dataloader.data_dir`)
 86 | 
 87 | Now we need to setup the data augmentation. The operations we support are :
 88 | 
 89 | - CentralCrop
 90 | - ColorJitter
 91 | - Normalize
 92 | - RandomHorizontalFlip
 93 | - RandomRotate
 94 | - RandomRotate90
 95 | - RandomVerticalFlip
 96 | - RandomCrop
 97 | - RandomCropThenResize
 98 | - Resize
 99 | - ResizeThenRandomCrop
100 | - Translate
101 | 
102 | Using the parameters `dataloader.train_list` and `dataloader.val_list` you can list the data augmentation operations to run in the data pipeline. Operation will be executed in the order of the list.
103 | Each of these operations have special parameters. Please check the output of `python3 train.py --help` for more details.
104 | 
105 | #### Setup the optimizer
106 | 
107 | Last this to setup in the training pipeline is of course the optimizer. Currently we support :
108 | 
109 | - adadelta
110 | - adagrad
111 | - adam
112 | - adam_amsgrad
113 | - sgd
114 | - sgd_momentum
115 | - sgd_nesterov
116 | - nadam
117 | - rmsprop
118 | 
119 | you can select the one you want using parameter `optimizer.name`. You may also want to tune the momentum of the optimizer using `optimizer.momentum` parameter
120 | 
121 | the initial learning rate can be selected using `optimizer.lr`. We also support a list of learning rate decay strategies:
122 | 
123 | - exponential_decay
124 | - step_decay
125 | - step_decay_schedule
126 | - polynomial_decay
127 | - inverse_time_decay
128 | - cosine_decay
129 | - lr_reduce_on_plateau
130 | 
131 | you can chose to turn on the learning rate decay strategy using option `optimizer.lr_decay_strategy.activate` and then select the one you prefer using `optimizer.lr_decay_strategy.lr_params.strategy` (default is lr_reduce_on_plateau).
132 | There is a list of parameters to setup for everyone of these learning rate decay, please see `python3 train.py --help` or file `submodules/global_dl/training/optimizers.py` for more details.
133 | 
134 | #### Other parameters
135 | 
136 | Now, most of the work is done. The only remaining options are:
137 | - `num_epochs` The number of epochs to run
138 | - `checkpoint_dir`, `export_dir`, `log_dir`: paths to write checkpoints, exported model and training logs
139 | - `configuration.with_mixed_precision`, `configuration.mirrored` , `configuration.profiler`, configuration for mixed precision training, mirrored strategy and tensorboard profiler
140 | 
141 | 
142 | #### training example
143 | 
144 | to train a mobilenet for 20 epochs on cifar10, a training command can looks like this:
145 | 
146 | ```bash
147 | python train.py \
148 |   --model.name MobileNetV2 \
149 |   --model.num_classes 10 \
150 |   --model.input_size 32 32 3 \
151 |   --num_epochs 20 \
152 |   --checkpoint_dir /tmp/checkpoint \
153 |   --log_dir /tmp/log \
154 |   --export.dir /tmp \
155 |   --dataloader.name cifar10 \
156 |   --dataloader.train_list RandomHorizontalFlip Normalize \
157 |   --dataloader.val_list Normalize \
158 |   --dataloader.val_split_id validation \
159 |   --dataloader.train_split_id train \
160 |   --dataloader.batch_size 64 \
161 |   --optimizer.lr 0.0001
162 | ```
163 | 
164 | ## More regarding the parameters
165 | 
166 | When training a neural network, you can use a bash command, but it is also possible to use a yaml file. For instance, the previous example can also be run with the command :
167 | 
168 | ```bash
169 | python train.py --yaml_config conf.yml
170 | ```
171 | 
172 | and the file `conf.yml`:
173 | 
174 | ```yaml
175 | model:
176 |   name: MobileNetV2
177 |   num_classes: 10
178 |   input_size: [32, 32, 3]
179 | num_epochs: 20
180 | checkpoint_dir: /tmp/checkpoint
181 | log_dir: /tmp/log
182 | export:
183 |   dir: /tmp
184 | dataloader:
185 |   name: cifar10
186 |   train_list: [RandomHorizontalFlip, Normalize]
187 |   val_list: [Normalize]
188 |   val_split_id: validation
189 |   train_split_id: train
190 |   batch_size: 64
191 | optimizer:
192 |   lr: 0.0001
193 | ```
194 | 
195 | you can also mix both training file and command line. If a parameter is defined in both, then the command line will prevail.
196 | For more information on the argument parser, please visit https://github.com/UpStride/betterargparse
197 | 
198 | 
199 | ## Keras-Tuner
200 | 
201 | Is is also possible to perform hyper-parameter tuning using Keras-Tuner and Hyperband algorithm. The 2 default exploration axis are 
202 | 
203 |   - depths : number of blocks in the network 
204 |   - factor : divide ratio for the number of channels
205 | 
206 | Please note that Keras-Tuner only work with Mobilenet and Resnet for now.
207 | 
208 | The final results will look like this (figures are validation accuracy percentages):
209 | 
210 | ![Keras-Tuner](../ressources/keras_tuner.png)
211 | 


--------------------------------------------------------------------------------
/src/models/test_fbnetv2.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import unittest
  3 | import os
  4 | import shutil
  5 | import tempfile
  6 | import yaml
  7 | import tensorflow as tf
  8 | import numpy as np
  9 | from . import fbnetv2
 10 | 
 11 | 
 12 | class TestBinaryVector(unittest.TestCase):
 13 |   def test_create_binary_vector(self):
 14 |     binary_vectors = fbnetv2.create_binary_vector(channel_sizes=[2, 7, 10], dtype=tf.float32)
 15 |     # check that we have 3 vectors
 16 |     self.assertEqual(len(binary_vectors), 3)
 17 |     # check the types of vectors
 18 |     for i in range(3):
 19 |       self.assertEqual(binary_vectors[i].dtype, tf.float32)
 20 |     # check the vector content
 21 |     self.assertTrue(np.array_equal(binary_vectors[0].numpy(), [1., 1., 0., 0., 0., 0., 0., 0., 0., 0.]))
 22 |     self.assertTrue(np.array_equal(binary_vectors[1].numpy(), [1., 1., 1., 1., 1., 1., 1., 0., 0., 0.]))
 23 |     self.assertTrue(np.array_equal(binary_vectors[2].numpy(), [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]))
 24 | 
 25 | 
 26 | class TestGetMask(unittest.TestCase):
 27 |   def test_get_mask(self):
 28 |     binary_vectors = fbnetv2.create_binary_vector(channel_sizes=[1, 2, 4], dtype=tf.float32)
 29 |     g = tf.convert_to_tensor([2., 3., 5.])
 30 |     mask = fbnetv2.get_mask(binary_vectors, g)
 31 |     self.assertEqual(mask.dtype, tf.float32)
 32 |     self.assertTrue(np.array_equal(mask, [10.,  8.,  5.,  5.]))
 33 | 
 34 | 
 35 | class TestChannelMasking(unittest.TestCase):
 36 |   def test_init(self):
 37 |     cm = fbnetv2.ChannelMasking(1, 5, 2, 'toto')
 38 |     self.assertEqual(cm.channel_sizes, [1, 3, 5])
 39 | 
 40 |   def test_build_manual(self):
 41 |     cm = fbnetv2.ChannelMasking(1, 5, 2, 'toto')
 42 |     cm.build((15, 15, 3))
 43 |     self.assertTrue(np.array_equal(cm.alpha.numpy(), [1., 1., 1.]))
 44 |     self.assertTrue(np.array_equal(cm.binary_vectors[0].numpy(), [1., 0., 0., 0., 0.]))
 45 |     self.assertTrue(np.array_equal(cm.binary_vectors[1].numpy(), [1., 1., 1., 0., 0.]))
 46 |     self.assertTrue(np.array_equal(cm.binary_vectors[2].numpy(), [1., 1., 1., 1., 1.]))
 47 | 
 48 |   def test_build_keras(self):
 49 |     cm = fbnetv2.ChannelMasking(1, 5, 2, 'toto', gumble_noise=False)
 50 |     model = tf.keras.Sequential([tf.keras.layers.Conv2D(5, (3, 3), padding='same', use_bias=False), cm])
 51 |     model(tf.zeros((1, 24, 24, 3), dtype=tf.float32))  # build is called here
 52 |     self.assertTrue(np.array_equal(cm.alpha.numpy(), [1., 1., 1.]))
 53 |     self.assertTrue(np.array_equal(cm.binary_vectors[0].numpy(), [1., 0., 0., 0., 0.]))
 54 |     self.assertTrue(np.array_equal(cm.binary_vectors[1].numpy(), [1., 1., 1., 0., 0.]))
 55 |     self.assertTrue(np.array_equal(cm.binary_vectors[2].numpy(), [1., 1., 1., 1., 1.]))
 56 | 
 57 |   def test_call(self):
 58 |     cm = fbnetv2.ChannelMasking(1, 5, 2, 'toto', gumble_noise=False)
 59 |     model = tf.keras.Sequential([cm])
 60 |     out = model(tf.ones((1, 3, 3, 5), dtype=tf.float32))  # build is called here
 61 |     # check g parameter
 62 |     for e in cm.g.numpy():
 63 |       self.assertAlmostEqual(e, 1/3)
 64 |     # check output of the model
 65 |     self.assertEqual(out.shape, (1, 3, 3, 5))
 66 |     components = out[0, 0, 0]
 67 |     self.assertAlmostEqual(components.numpy()[0], 1)
 68 |     self.assertAlmostEqual(components.numpy()[1], 2/3)
 69 |     self.assertAlmostEqual(components.numpy()[2], 2/3)
 70 |     self.assertAlmostEqual(components.numpy()[3], 1/3)
 71 |     self.assertAlmostEqual(components.numpy()[4], 1/3)
 72 | 
 73 | 
 74 | class TestExponentialDecay(unittest.TestCase):
 75 |   def non_increasing(self, decay):
 76 |     """Checks if all the function provided are non increasing over a range
 77 | 
 78 |     Args:
 79 |         decay (instance): Instance of the decay to be tested
 80 | 
 81 |     Returns:
 82 |         bool : Compares ith and i + 1st element of the value_list
 83 |         returns True if all i >= i+1 else False
 84 |     """
 85 |     value_list = [decay(i) for i in range(1, 100)]
 86 |     return all([i >= j for i, j in zip(value_list, value_list[1:])])
 87 | 
 88 |   def test_exponential_decay(self):
 89 |     decay = fbnetv2.exponential_decay(5, 1, 0.956)
 90 | 
 91 |     self.assertEqual(decay(0), 5)
 92 |     self.assertAlmostEqual(decay(10), 3.188, places=3)
 93 | 
 94 |     # test function is not increasing over number of epochs
 95 |     self.assertTrue(self.non_increasing(decay), True)
 96 | 
 97 |     # Negative test to ensure decay rate greater than 1 is increasing
 98 |     decay = fbnetv2.exponential_decay(5, 1, 1.1)
 99 |     self.assertFalse(self.non_increasing(decay), False)
100 | 
101 | 
102 | class TestPostTrainingAnalysis(unittest.TestCase):
103 |   def test_post_training_anaysis(self):
104 |     cm1 = fbnetv2.ChannelMasking(1, 5, 2, 'toto_1_savable', gumble_noise=False)
105 |     cm2 = fbnetv2.ChannelMasking(8, 16, 4, 'toto_2_savable', gumble_noise=False)
106 |     model = tf.keras.Sequential(
107 |         [tf.keras.layers.Conv2D(5, (3, 3), padding='same', use_bias=False),
108 |          cm1,
109 |          tf.keras.layers.Conv2D(16, (3, 3), padding='same', use_bias=False),
110 |          cm2,
111 |          ])
112 |     model(tf.zeros((1, 24, 24, 3), dtype=tf.float32))  # build is called here
113 |     tmpdir = tempfile.mkdtemp()
114 |     tmpfile = os.path.join(tmpdir, "test.yaml")
115 |     fbnetv2.post_training_analysis(model, tmpfile)
116 |     with open(tmpfile, 'r') as f:
117 |       read = yaml.safe_load(f)
118 |     self.assertDictEqual({"toto_1": 1, "toto_2": 8}, read)
119 |     shutil.rmtree(tmpdir)
120 | 
121 | 
122 | class TestSplitTtrainableWeights(unittest.TestCase):
123 |   def test_split_trainable_weights(self):
124 |     layer0 = tf.keras.layers.Input((32, 32, 3))
125 |     layer1 = tf.keras.layers.Conv2D(8, kernel_size=3, strides=1, padding='same')
126 |     layer2 = fbnetv2.ChannelMasking(2, 8, 2, 'abc', gumble_noise=False)
127 |     model = tf.keras.Sequential([layer0, layer1, layer2])
128 | 
129 |     weights, arch_params = fbnetv2.split_trainable_weights(model)
130 | 
131 |     true_total_weight_param = 3*3*3*8+8
132 |     true_total_arch_param = len(range(2, 8+1, 2))
133 | 
134 |     # calculate number of weight params returned by  the function
135 |     total_weight_params = 0
136 |     for w in weights:
137 |       total_weight_params += np.prod(w.shape.as_list())
138 | 
139 |     # calculate number of architecture params returned by  the function
140 |     total_arch_params = 0
141 |     for p in arch_params:
142 |       total_arch_params += np.prod(p.shape.as_list())
143 | 
144 |     self.assertEqual(total_arch_params, true_total_arch_param)
145 |     self.assertEqual(total_weight_params, true_total_weight_param)
146 |     self.assertEqual(total_weight_params+total_arch_params, true_total_weight_param+true_total_arch_param)
147 | 
148 |   def test_not_arch_params(self):
149 |     layer0 = tf.keras.layers.Input((32, 32, 3))
150 |     layer1 = tf.keras.layers.Conv2D(8, kernel_size=3, strides=1, padding='same')
151 |     model = tf.keras.Sequential([layer0, layer1])
152 | 
153 |     # check if it raises error when there is no architectural parameters by the name 'alpha'
154 |     self.assertRaises(ValueError, fbnetv2.split_trainable_weights, model, arch_params_name='alpha')
155 | 
156 | 
157 | class TestGumbelSoftmax(unittest.TestCase):
158 |   def testSampling(self):
159 |     fbnetv2.define_temperature(5.)
160 |     noise = 0.0001
161 |     logits = tf.constant([-1., 0.5, 1.])
162 | 
163 |     g = fbnetv2.gumbel_softmax(logits, gumble_noise=False)
164 | 
165 |     self.assertEqual(logits.shape.as_list(), g.shape.as_list())
166 |     self.assertAlmostEqual(g.numpy().sum(), 1.0, 6)
167 |     self.assertEqual(g.numpy().tolist(), tf.math.softmax((logits+noise)/5.).numpy().tolist())
168 | 
169 |   def testUniformLikeDist(self):
170 |     # set temperature values to high to see Uniform like distribution
171 |     fbnetv2.define_temperature(500000.0)
172 |     logits = tf.constant([-2., 2., -2.5, -2.])
173 | 
174 |     g = fbnetv2.gumbel_softmax(logits, gumble_noise=False)
175 | 
176 |     for i in range(4):
177 |       self.assertAlmostEqual(float(g[i].numpy()), 0.25, 5)
178 | 
179 |     previous_g = g
180 |     for t in range(100, 1, -1):
181 |       fbnetv2.define_temperature(t)
182 |       g = fbnetv2.gumbel_softmax(logits, gumble_noise=False)
183 |       for i in [0, 2, 3]:
184 |         self.assertLess(g[i], previous_g[i])
185 |       self.assertLess(previous_g[1], g[1])
186 |       previous_g = g
187 |     fbnetv2.define_temperature(5.0)
188 | 
189 |   def testOnehotLikeDist(self):
190 |     # set temperature values to high to see Onehot like distribution
191 |     fbnetv2.define_temperature(0.00001)
192 |     logits = tf.constant([-2., 2., -2.5, -2.])
193 | 
194 |     g = fbnetv2.gumbel_softmax(logits, gumble_noise=False)
195 | 
196 |     self.assertAlmostEqual(g[0], 0.)
197 |     self.assertAlmostEqual(g[1], 1.)
198 |     self.assertAlmostEqual(g[2], 0.)
199 |     self.assertAlmostEqual(g[3], 0.)
200 |     fbnetv2.define_temperature(5.0)
201 | 
202 | 
203 | class TestSaveArchParams(unittest.TestCase):
204 |   def test_save_arch_params(self):
205 |     cm1 = fbnetv2.ChannelMasking(1, 5, 2, 'toto_1_savable', gumble_noise=False)
206 |     cm2 = fbnetv2.ChannelMasking(8, 16, 4, 'toto_2_savable', gumble_noise=False)
207 |     model = tf.keras.Sequential(
208 |         [tf.keras.layers.Conv2D(5, (3, 3), padding='same', use_bias=False),
209 |          cm1,
210 |          tf.keras.layers.Conv2D(16, (3, 3), padding='same', use_bias=False),
211 |          cm2,
212 |          ])
213 |     model(tf.zeros((1, 24, 24, 3), dtype=tf.float32))  # build is called here
214 | 
215 |     tmpdir = tempfile.mkdtemp()
216 |     fbnetv2.save_arch_params(model, epoch=0, log_dir=tmpdir)
217 | 
218 |     # check that the file exists and the content
219 |     self.assertTrue(os.path.exists(os.path.join(tmpdir, "alpha.json")))
220 |     with open(os.path.join(tmpdir, "alpha.json")) as f:
221 |       a = json.load(f)
222 |     self.assertTrue('0' in a)
223 |     self.assertTrue('toto_1_savable' in a['0'])
224 |     self.assertTrue('toto_2_savable' in a['0'])
225 | 
226 |     # simulate next epoch
227 |     cm1.alpha = tf.convert_to_tensor([0.5, 1, 0.5], dtype=tf.float32)
228 |     fbnetv2.save_arch_params(model, epoch=1, log_dir=tmpdir)
229 |     with open(os.path.join(tmpdir, "alpha.json")) as f:
230 |       a = json.load(f)
231 |     for i in range(1):
232 |       self.assertTrue(str(i) in a)
233 |       self.assertTrue('toto_1_savable' in a[str(i)])
234 |       self.assertTrue('toto_2_savable' in a[str(i)])
235 | 
236 |     shutil.rmtree(tmpdir)
237 | 


--------------------------------------------------------------------------------
/src/models/resnet.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from .generic_model import GenericModelBuilder
  3 | 
  4 | 
  5 | weight_init = tf.keras.initializers.VarianceScaling()
  6 | 
  7 | 
  8 | class ResNet(GenericModelBuilder):
  9 |   def __init__(self, res_n, *args, **kwargs):
 10 |     super(ResNet, self).__init__(*args, **kwargs)
 11 |     self.res_n = res_n
 12 | 
 13 |   def get_residual_layer(self):
 14 |     n_to_residual = {
 15 |         10: [1, 1, 1, 1],
 16 |         12: [1, 1, 2, 1],
 17 |         14: [1, 2, 2, 1],
 18 |         16: [2, 2, 2, 1],
 19 |         18: [2, 2, 2, 2],
 20 |         20: [2, 2, 3, 2],
 21 |         22: [2, 3, 3, 2],
 22 |         24: [2, 3, 4, 2],
 23 |         26: [2, 3, 5, 2],
 24 |         28: [2, 3, 6, 2],
 25 |         30: [2, 4, 6, 2],
 26 |         32: [3, 4, 6, 2],
 27 |         34: [3, 4, 6, 3],
 28 |         50: [3, 4, 6, 3],
 29 |         101: [3, 4, 23, 3],
 30 |         152: [3, 8, 36, 3],
 31 |     }
 32 |     return n_to_residual[self.res_n]
 33 | 
 34 |   def model(self, x):
 35 |     if self.res_n < 50:
 36 |       residual_block = self.resblock
 37 |     else:
 38 |       residual_block = self.bottle_resblock
 39 |     residual_list = self.get_residual_layer()
 40 |     ch = 64
 41 |     weight_regularizer = self.weight_regularizer
 42 |     x = self.layers.Conv2D(int(ch/self.factor), 7, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, padding="same", name='conv')(x)
 43 |     x = self.layers.MaxPooling2D(pool_size=3, strides=2)(x)
 44 |     for i in range(residual_list[0]):
 45 |       x = residual_block(x, channels=int(ch/self.factor), downsample=False, block_name='resblock0_' + str(i))
 46 |     # block 1
 47 |     x = residual_block(x, channels=int(ch/self.factor) * 2, downsample=True, block_name='resblock1_0')
 48 |     for i in range(1, residual_list[1]):
 49 |       x = residual_block(x, channels=int(ch/self.factor) * 2, downsample=False, block_name='resblock1_' + str(i))
 50 |     # block 2
 51 |     x = residual_block(x, channels=int(ch/self.factor) * 4, downsample=True, block_name='resblock2_0')
 52 |     for i in range(1, residual_list[2]):
 53 |       x = residual_block(x, channels=int(ch/self.factor) * 4, downsample=False, block_name='resblock2_' + str(i))
 54 |     # block 3
 55 |     x = residual_block(x, channels=int(ch/self.factor) * 8, downsample=True, block_name='resblock_3_0')
 56 |     for i in range(1, residual_list[3]):
 57 |       x = residual_block(x, channels=int(ch/self.factor) * 8, downsample=False, block_name='resblock_3_' + str(i))
 58 |     # block 4
 59 |     x = self.layers.BatchNormalization(axis=self.channel_axis, name='batch_norm_last')(x)
 60 |     x = self.layers.Activation('relu', name='relu_last')(x)
 61 |     x = self.layers.GlobalAveragePooling2D()(x)
 62 |     return x
 63 | 
 64 |   def resblock(self, x, channels, use_bias=True, downsample=False, block_name='resblock'):
 65 |     layers = self.layers
 66 |     weight_regularizer = self.weight_regularizer
 67 |     x_init = x
 68 |     x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_0')(x)
 69 |     x = layers.Activation('relu', name=block_name + '/relu_0')(x)
 70 |     if downsample:
 71 |       x = layers.Conv2D(channels, 3, 2, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer,
 72 |                         use_bias=use_bias, padding='same', name=block_name + '/conv_0')(x)
 73 |       x_init = layers.Conv2D(channels, 1, 2, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer,
 74 |                              use_bias=False, padding='same', name=block_name + '/conv_init')(x_init)
 75 |     else:
 76 |       x = layers.Conv2D(channels, 3, 1, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer,
 77 |                         use_bias=False, padding='same', name=block_name + '/conv_0')(x)
 78 |     x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_1')(x)
 79 |     x = layers.Activation('relu', name=block_name + '/relu_1')(x)
 80 |     x = layers.Conv2D(channels, 3, 1, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer,
 81 |                       use_bias=False, padding='same', name=block_name + '/conv_1')(x)
 82 |     x = layers.Add()([x, x_init])
 83 |     return x
 84 | 
 85 |   def bottle_resblock(self, x, channels, use_bias=True, downsample=False, block_name='bottle_resblock'):
 86 |     layers = self.layers
 87 |     weight_regularizer = self.weight_regularizer
 88 |     x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_1x1_front')(x)
 89 |     shortcut = layers.Activation('relu', name=block_name + '/relu_1x1_front')(x)
 90 |     x = layers.Conv2D(channels, 1, 1, 'same', kernel_initializer=weight_init, kernel_regularizer=weight_regularizer,
 91 |                       use_bias=False, name=block_name + '/conv_1x1_front')(shortcut)
 92 |     x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_3x3')(x)
 93 |     x = layers.Activation('relu', name=block_name + '/relu_3x3')(x)
 94 |     if downsample:
 95 |       x = layers.Conv2D(channels, 3, 2, 'same', kernel_initializer=weight_init,
 96 |                         kernel_regularizer=weight_regularizer, use_bias=use_bias, name=block_name + '/conv_0')(x)
 97 |       shortcut = layers.Conv2D(channels * 4, 1, 2, 'same', kernel_initializer=weight_init, kernel_regularizer=weight_regularizer,
 98 |                                use_bias=False, name=block_name + '/conv_init')(shortcut)
 99 |     else:
100 |       x = layers.Conv2D(channels, 3, 1, 'same', kernel_initializer=weight_init,
101 |                         kernel_regularizer=weight_regularizer, use_bias=False, name=block_name + '/conv_0')(x)
102 |       shortcut = layers.Conv2D(channels * 4, 1, 1, 'same', kernel_initializer=weight_init, kernel_regularizer=weight_regularizer,
103 |                                use_bias=False, name=block_name + '/conv_init')(shortcut)
104 |     x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_1x1_back')(x)
105 |     x = layers.Activation('relu', name=block_name + '/relu_1x1_back')(x)
106 |     x = layers.Conv2D(channels * 4, 1, 1, 'same', kernel_initializer=weight_init, kernel_regularizer=weight_regularizer,
107 |                       use_bias=False, name=block_name + '/conv_1x1_back')(x)
108 |     x = layers.Add()([x, shortcut])
109 |     return x
110 | 
111 | 
112 | class ResNetHyper(ResNet):
113 |   def __init__(self, *args, **kwargs):
114 |     super().__init__(2 * self.hp.Int('depth', min_value=5, max_value=17, step=1), *args, **kwargs)
115 | 
116 | 
117 | class ResNet50(ResNet):
118 |   def __init__(self, *args, **kwargs):
119 |     super().__init__(50, *args, **kwargs)
120 | 
121 | 
122 | class ResNet101(ResNet):
123 |   def __init__(self, *args, **kwargs):
124 |     super().__init__(101, *args, **kwargs)
125 | 
126 | 
127 | class ResNet152(ResNet):
128 |   def __init__(self, *args, **kwargs):
129 |     super().__init__(152, *args, **kwargs)
130 | 
131 | 
132 | class ResNet34(ResNet):
133 |   def __init__(self, *args, **kwargs):
134 |     super().__init__(34, *args, **kwargs)
135 | 
136 | 
137 | class ResNet18(ResNet):
138 |   def __init__(self, *args, **kwargs):
139 |     super().__init__(18, *args, **kwargs)
140 | 
141 | 
142 | class ResNetCIFAR(GenericModelBuilder):
143 |   def __init__(self, res_n, *args, **kwargs):
144 |     super(ResNetCIFAR, self).__init__(*args, **kwargs)
145 |     self.res_n = res_n
146 | 
147 |   def get_residual_layer(self):
148 |     n_to_residual = {
149 |         20: [3],
150 |         32: [5],
151 |         44: [7],
152 |         56: [9],
153 |     }
154 |     return n_to_residual[self.res_n] * 3
155 | 
156 |   def model(self, x):
157 |     residual_list = self.get_residual_layer()
158 |     weight_regularizer = self.weight_regularizer
159 |     ch = 16
160 |     x = self.layers.Conv2D(int(ch/self.factor), 3, 1, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer,
161 |                            padding="same", name='conv')(x)
162 | 
163 |     # block 1
164 |     for i in range(residual_list[0]):
165 |       x = self.resblock_cifar(x, channels=int(ch/self.factor), stride=1, downsample=False, block_name='resblock0_' + str(i))
166 |     # block 2
167 |     x = self.resblock_cifar(x, channels=int(ch/self.factor) * 2, stride=2, downsample=True, block_name='resblock1_0')
168 |     for i in range(1, residual_list[1]):
169 |       x = self.resblock_cifar(x, channels=int(ch/self.factor) * 2, stride=1, downsample=False, block_name='resblock1_' + str(i))
170 |     # block 3
171 |     x = self.resblock_cifar(x, channels=int(ch/self.factor) * 4, stride=2, downsample=True, block_name='resblock2_0')
172 |     for i in range(1, residual_list[2]):
173 |       x = self.resblock_cifar(x, channels=int(ch/self.factor) * 4, stride=1, downsample=False, block_name='resblock2_' + str(i))
174 |     # block 4
175 |     x = self.layers.BatchNormalization(axis=self.channel_axis, name='batch_norm_last')(x)
176 |     x = self.layers.Activation('relu', name='relu_last')(x)
177 |     x = self.layers.GlobalAveragePooling2D()(x)
178 |     return x
179 | 
180 |   def resblock_cifar(self, x, channels, use_bias=True, stride=1, downsample=False, block_name='resblock'):
181 |     layers = self.layers
182 |     weight_regularizer = self.weight_regularizer
183 |     x_init = x
184 |     x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_0')(x)
185 |     x = layers.Activation('relu', name=block_name + '/relu_0')(x)
186 |     if downsample:
187 |       x_init = layers.Conv2D(channels, 3, 2, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer,
188 |                              use_bias=use_bias, padding='same', name=block_name + '/conv_init')(x_init)
189 |     x = layers.Conv2D(channels, 3, strides=stride, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer,
190 |                       use_bias=False, padding='same', name=block_name + '/conv_0')(x)
191 |     x = layers.BatchNormalization(axis=self.channel_axis, name=block_name + '/batch_norm_1')(x)
192 |     x = layers.Activation('relu', name=block_name + '/relu_1')(x)
193 |     x = layers.Conv2D(channels, 3, 1, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer,
194 |                       use_bias=False, padding='same', name=block_name + '/conv_1')(x)
195 |     x = layers.Add()([x, x_init])
196 |     return x
197 | 
198 | 
199 | class ResNet20CIFAR(ResNetCIFAR):
200 |   def __init__(self, *args, **kwargs):
201 |     super().__init__(20, *args, **kwargs)
202 | 
203 | 
204 | class ResNet32CIFAR(ResNetCIFAR):
205 |   def __init__(self, *args, **kwargs):
206 |     super().__init__(32, *args, **kwargs)
207 | 
208 | 
209 | class ResNet44CIFAR(ResNetCIFAR):
210 |   def __init__(self, *args, **kwargs):
211 |     super().__init__(44, *args, **kwargs)
212 | 
213 | 
214 | class ResNet56CIFAR(ResNetCIFAR):
215 |   def __init__(self, *args, **kwargs):
216 |     super().__init__(56, *args, **kwargs)
217 | 


--------------------------------------------------------------------------------
/src/models/fbnet_mobilenet.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | 
  3 | import yaml
  4 | 
  5 | from .generic_model import GenericModelBuilder
  6 | from .fbnetv2 import ChannelMasking
  7 | from .mobilenet import correct_pad
  8 | 
  9 | BATCHNORM_MOMENTUM = 0.9
 10 | arch_param_regularizer = tf.keras.regularizers.l2(l=0.0005)
 11 | 
 12 | class _FBNet_MobileNetV2(GenericModelBuilder):
 13 |   def __init__(self, *args, load_searched_arch: str = None, **kwargs):
 14 |     """the official implementation use tf.keras.backend.int_shape(x)[-1] to compute in_channels.
 15 |     But with upstride we can't, because if working with the cpp engine, the shape of the tensor is multiply by the length of the multivector.
 16 |     To bypass this issue, we can remember the output shape of the last block and use it to define in_channels
 17 |     At the begining of the model building, the shape shoud be 3 (number of channels)
 18 |     """
 19 |     self.last_block_output_shape = 3
 20 |     self.load_searched_arch = load_searched_arch
 21 |     
 22 |     if self.load_searched_arch:
 23 |       if tf.io.gfile.exists(self.load_searched_arch):
 24 |         with open(self.load_searched_arch, 'r') as f:
 25 |           self.model_def = yaml.safe_load(f)
 26 |       else:
 27 |         raise FileNotFoundError(f"{self.load_searched_arch} incorrect, check the path")
 28 |       assert all([k1 == k2 for k1, k2 in zip(self.model_def.keys(), self.mapping.keys())]), "keys are not the same"
 29 |       for (k1, v1), (k2, v2) in zip(self.model_def.items(), self.mapping.items()):
 30 |           if k1 == k2:
 31 |             self.mapping[k2][0] = v1
 32 | 
 33 |     # init of super class need to be called at the end of this init because it calls model(), so everything need to be ready before
 34 |     super().__init__(*args, **kwargs)
 35 | 
 36 |   def _inverted_res_block(self, x, filters, stride, expansion, name):
 37 |     """This block performs the Conv(expansion)-> DepthWiseConv -> Conv(Projection))
 38 | 
 39 |     Args:
 40 |         expansion (integer): Interger value to increase the channels from the previous layer
 41 |         stride (Int): Strides to be applied in the convolution
 42 |         filters (Int) or tuple(Int): total feature maps to be obtained at the end of the block or range (min, max, step) during arch search
 43 |         name (str): Indicates the block number and controls expansion or just depthwise separable convolution
 44 |     """
 45 |     layers = self.layers
 46 |     weight_regularizer = self.weight_regularizer
 47 |     in_channels = self.last_block_output_shape
 48 |     # If model definition file is not passed
 49 |     if not self.load_searched_arch:
 50 |       # get the max possible number of channels
 51 |       pointwise_conv_filters = filters[1]
 52 |     else:
 53 |       # get the number of channels defined in the file
 54 |       pointwise_conv_filters = filters
 55 | 
 56 |     # TODO Enable this to see if we get speed up (multiples of 8 is required for activating tensor cores for mixed precision training)
 57 |     # pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
 58 |     inputs = x
 59 |     prefix = name
 60 | 
 61 | 
 62 |     # Expand
 63 |     x = layers.Conv2D((expansion * in_channels), kernel_size=1, padding='same', use_bias=False, name=prefix + 'expand', kernel_regularizer=weight_regularizer)(x)
 64 |     # if not self.load_searched_arch:
 65 |       # new_filter_range = [i * expansion for i in filters]
 66 |       # x = ChannelMasking(*new_filter_range, name=prefix + '_cm1', regularizer=arch_param_regularizer)(x)
 67 |     x = layers.BatchNormalization(epsilon=1e-3, momentum=BATCHNORM_MOMENTUM, name=prefix + 'expand_BN')(x)
 68 |     x = layers.ReLU(6., name=prefix + 'expand_relu')(x)
 69 | 
 70 |     # Depthwise
 71 |     if stride == 2:
 72 |       x = layers.ZeroPadding2D(padding=correct_pad(x, 3, self.is_channels_first), name=prefix + 'pad')(x)
 73 |     x = layers.DepthwiseConv2D(kernel_size=3, strides=stride, activation=None, use_bias=False, padding='same' if stride == 1 else 'valid',
 74 |                                     name=prefix + 'depthwise', depthwise_regularizer=weight_regularizer)(x)
 75 |     x = layers.BatchNormalization(epsilon=1e-3, momentum=BATCHNORM_MOMENTUM, name=prefix + 'depthwise_BN')(x)
 76 |     x = layers.ReLU(6., name=prefix + 'depthwise_relu')(x)
 77 | 
 78 |     # Project
 79 |     x = layers.Conv2D(pointwise_conv_filters, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'project', kernel_regularizer=weight_regularizer)(x)
 80 |     if not self.load_searched_arch:
 81 |       x = ChannelMasking(*filters, name=prefix + '_savable', regularizer=arch_param_regularizer)(x)
 82 |     x = layers.BatchNormalization(epsilon=1e-3, momentum=BATCHNORM_MOMENTUM, name=prefix + 'project_BN')(x)
 83 | 
 84 |     if in_channels == pointwise_conv_filters and stride == 1:
 85 |       x = layers.Add(name=prefix + 'add')([inputs, x])
 86 |     self.last_block_output_shape = pointwise_conv_filters
 87 | 
 88 |     return x
 89 | 
 90 |   def model(self, x, alpha=1.0):
 91 |     """Instantiates the MobileNetV2 architecture.
 92 |     Args:
 93 |         alpha: controls the width of the network. This is known as the
 94 |         width multiplier in the MobileNetV2 paper, but the name is kept for
 95 |         consistency with MobileNetV1 in Keras.
 96 |             - If `alpha` < 1.0, proportionally decreases the number
 97 |                 of filters in each layer.
 98 |             - If `alpha` > 1.0, proportionally increases the number
 99 |                 of filters in each layer.
100 |             - If `alpha` = 1, default number of filters from the paper
101 |                 are used at each layer.
102 |     """
103 |     weight_regularizer = self.weight_regularizer
104 | 
105 |     # first_block_filters = _make_divisible(16, 8)
106 |     x = self.layers.ZeroPadding2D(padding=correct_pad(x, 3, self.is_channels_first), name='conv1_pad')(x)
107 | 
108 |     first_block_filters = self.mapping['conv2d_01']
109 |     if not self.load_searched_arch:
110 |       x = self.layers.Conv2D(first_block_filters[0][1], kernel_size=3, strides=2, padding='valid', use_bias=False, name='conv2d_01', kernel_regularizer=weight_regularizer)(x)
111 |       x = ChannelMasking(*first_block_filters[0], name='conv2d_01_savable', regularizer=arch_param_regularizer)(x)
112 |       self.last_block_output_shape = first_block_filters[0][1]
113 |     else:
114 |       x = self.layers.Conv2D(first_block_filters[0], kernel_size=3, strides=2, padding='valid', use_bias=False, name='conv2d_01', kernel_regularizer=weight_regularizer)(x)
115 |       self.last_block_output_shape = first_block_filters[0]
116 |     x = self.layers.BatchNormalization(epsilon=1e-3, momentum=BATCHNORM_MOMENTUM)(x)
117 |     x = self.layers.ReLU(6.)(x)
118 |     
119 | 
120 |     # Inverted residuals
121 |     for k, v in self.mapping.items():
122 |       if k.split('_')[0] == 'irb':  # ignore conv2d for now
123 |         x = self._inverted_res_block(x, filters=v[0], stride=v[1], expansion=v[2], name=k)
124 | 
125 |     # no alpha applied to last conv as stated in the paper:
126 |     # if the width multiplier is greater than 1 we
127 |     # increase the number of output channels
128 |     last_block_filters = 1984  # TODO try with 1280
129 | 
130 |     # TODO move this into the _conv_block once we planned to use the channel masking for the below
131 |     x = self.layers.Conv2D(last_block_filters, kernel_size=1, use_bias=False, kernel_regularizer=weight_regularizer)(x)
132 |     # if not self.load_searched_arch:
133 |     #     x = layers.ChannelMasking(, 1984, )(x) # TODO test
134 |     x = self.layers.BatchNormalization(epsilon=1e-3, momentum=BATCHNORM_MOMENTUM)(x)
135 |     x = self.layers.ReLU(6., name='out_relu')(x)
136 | 
137 |     x = self.layers.GlobalAveragePooling2D()(x)
138 |     # x = self.layers.Dense(self.label_dim, use_bias=True, name='Logits', kernel_regularizer=weight_regularizer)(x)
139 |     return x
140 | 
141 | 
142 | 
143 | class FBNet_MobileNetV2CIFAR(_FBNet_MobileNetV2):
144 |   def __init__(self, *args, **kwargs):
145 |     self.mapping = {
146 |         # filter_range,  Stride,  expansion
147 |         'conv2d_01': [(8, 16, 4),  1,        1],
148 |         'irb_01': [(12, 16, 4),    1,        1],
149 |         'irb_02': [(16, 24, 4),    1,        6],
150 |         'irb_03': [(16, 24, 4),    1,        6],
151 |         'irb_04': [(16, 24, 4),    1,        6],
152 |         'irb_05': [(16, 40, 8),    1,        6],
153 |         'irb_06': [(16, 40, 8),    1,        6],
154 |         'irb_07': [(16, 40, 8),    1,        6],
155 |         'irb_08': [(48, 80, 8),    1,        6],
156 |         'irb_09': [(48, 80, 8),    1,        6],
157 |         'irb_10': [(48, 80, 8),   1,        6],
158 |         'irb_11': [(72, 112, 8),  1,        6],
159 |         'irb_12': [(72, 112, 8),  1,        6],
160 |         'irb_13': [(72, 112, 8),  1,        6],
161 |         'irb_14': [(112, 184, 8), 2,        6],
162 |         'irb_15': [(112, 184, 8), 1,        6],
163 |         'irb_16': [(112, 184, 8), 1,        6],
164 |         'irb_17': [(112, 184, 8), 1,        6],
165 |         # 'conv2d_2': [1984,         1,        1],
166 |       }
167 |     super().__init__(*args, **kwargs)
168 | 
169 | 
170 | class FBNet_MobileNetV2CIFARUP(_FBNet_MobileNetV2):
171 |   def __init__(self, *args, **kwargs):
172 |     self.mapping = {
173 |         # filter_range,  Stride,  expansion
174 |         'conv2d_01': [(4, 16, 4),  1,        1],
175 |         'irb_01': [(4, 8, 4),    1,        1],
176 |         'irb_02': [(4, 12, 4),    1,        6],
177 |         'irb_03': [(4, 12, 4),    1,        6],
178 |         'irb_04': [(4, 16, 4),    2,        6],
179 |         'irb_05': [(4, 16, 4),    1,        6],
180 |         'irb_06': [(4, 16, 4),    1,        6],
181 |         'irb_07': [(8, 32, 4),    2,        6],
182 |         'irb_08': [(8, 32, 4),    1,        6],
183 |         'irb_09': [(8, 32, 4),    1,        6],
184 |         'irb_10': [(8, 32, 4),   1,        6],
185 |         'irb_11': [(12, 48, 4),  1,        6],
186 |         'irb_12': [(12, 48, 4),  1,        6],
187 |         'irb_13': [(12, 48, 4),  1,        6],
188 |         'irb_14': [(24, 80, 8), 2,        6],
189 |         'irb_15': [(24, 80, 8), 1,        6],
190 |         'irb_16': [(24, 80, 8), 1,        6],
191 |         'irb_17': [(40, 160, 8), 1,        6],
192 |         # 'conv2d_2': [1984,         1,        1],
193 |       }
194 |     super().__init__(*args, **kwargs)
195 | 
196 | class FBNet_MobileNetV2Imagenet(_FBNet_MobileNetV2):
197 |   def __init__(self, *args, **kwargs):
198 |     self.mapping = {
199 |         # filter_range,  Stride,  expansion
200 |         'conv2d_01': [(8, 16, 4),  2,        1],
201 |         'irb_01': [(12, 16, 4),    1,        1],
202 |         'irb_02': [(16, 24, 4),    2,        6],
203 |         'irb_03': [(16, 24, 4),    1,        6],
204 |         'irb_04': [(16, 24, 4),    1,        6],
205 |         'irb_05': [(16, 40, 8),    2,        6],
206 |         'irb_06': [(16, 40, 8),    1,        6],
207 |         'irb_07': [(16, 40, 8),    1,        6],
208 |         'irb_08': [(48, 80, 8),    2,        6],
209 |         'irb_09': [(48, 80, 8),    1,        6],
210 |         'irb_10': [(48, 80, 8),   1,        6],
211 |         'irb_11': [(72, 112, 8),  1,        6],
212 |         'irb_12': [(72, 112, 8),  1,        6],
213 |         'irb_13': [(72, 112, 8),  1,        6],
214 |         'irb_14': [(112, 184, 8), 2,        6],
215 |         'irb_15': [(112, 184, 8), 1,        6],
216 |         'irb_16': [(112, 184, 8), 1,        6],
217 |         'irb_17': [(112, 184, 8), 1,        6],
218 |         # 'conv2d_2': [1984,         1,        1],
219 |       }
220 |     super().__init__(*args, **kwargs)


--------------------------------------------------------------------------------
/train_arch_search.py:
--------------------------------------------------------------------------------
  1 | import tqdm
  2 | import yaml
  3 | from src.models.fbnetv2 import ChannelMasking, define_temperature
  4 | from submodules.global_dl.global_conf import config_tf2
  5 | import math
  6 | import os
  7 | import tensorflow as tf
  8 | import upstride_argparse as argparse
  9 | from src.argument_parser import training_arguments_das
 10 | from src.data import dataloader
 11 | from src import losses
 12 | from src.models import model_name_to_class
 13 | from src.models.generic_model import framework_list
 14 | from src.utils import check_folder, get_imagenet_data, model_dir
 15 | from submodules.global_dl import global_conf
 16 | from submodules.global_dl.training.training import create_env_directories, setup_mp, define_model_in_strategy, get_callbacks, init_custom_checkpoint_callbacks
 17 | from submodules.global_dl.training import training
 18 | from submodules.global_dl.training import alchemy_api
 19 | from submodules.global_dl.training import export
 20 | from submodules.global_dl.training.optimizers import get_lr_scheduler, get_optimizer, StepDecaySchedule, CosineDecay
 21 | from submodules.global_dl.training import optimizers
 22 | from src.models import fbnetv2
 23 | 
 24 | arguments = [
 25 |     ['namespace', 'dataloader', dataloader.arguments],
 26 |     ['namespace', 'server', alchemy_api.arguments],
 27 |     ['namespace', 'optimizer', optimizers.arguments],
 28 |     ['namespace', 'export', export.arguments],
 29 |     ['namespace', 'arch_search', training_arguments_das],
 30 |     [int, "factor", 1, 'division factor to scale the number of channel. factor=2 means the model will have half the number of channels compare to default implementation'],
 31 |     [int, 'n_layers_before_tf', 0, 'when using mix framework, number of layer defined using upstride', lambda x: x >= 0],
 32 |     [str, 'load_searched_arch', '', 'model definition file containing the searched architecture'],
 33 |     [bool, 'log_arch', False, 'if true then save the values of the alpha parameters after every epochs in a csv file in log directory'],
 34 |     [str, "model_name", '', 'Specify the name of the model', lambda x: x in model_name_to_class],
 35 |     [str, 'framework', 'tensorflow', 'Framework to use to define the model', lambda x: x in framework_list],
 36 | ] + global_conf.arguments + training.arguments
 37 | 
 38 | 
 39 | def main():
 40 |   """ function called when starting the code via command-line
 41 |   """
 42 |   args = argparse.parse_cmd(arguments)
 43 |   args['server'] = alchemy_api.start_training(args['server'])
 44 |   train(args)
 45 | 
 46 | 
 47 | def get_experiment_name(args):
 48 |   experiment_dir = f"{args['model_name']}_{args['framework']}"
 49 |   if 'mix' in args['framework']:
 50 |     experiment_dir += "_mix_{}".format(args['n_layers_before_tf'])
 51 |   if args['configuration']['with_mixed_precision']:
 52 |     experiment_dir += "_mp"
 53 |   return experiment_dir
 54 | 
 55 | 
 56 | def get_train_step_function(model, weights, weight_opt, metrics):
 57 |   train_accuracy_metric = metrics['accuracy']
 58 |   train_cross_entropy_loss_metric = metrics['cross_entropy_loss']
 59 |   train_total_loss_metric = metrics['total_loss']
 60 | 
 61 |   @tf.function
 62 |   def train_step(x_batch, y_batch):
 63 |     with tf.GradientTape() as tape:
 64 |       y_hat = model(x_batch, training=True)
 65 |       cross_entropy_loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_batch, y_hat))
 66 |       weight_reg_loss = tf.reduce_sum(model.losses)
 67 |       total_loss = cross_entropy_loss + weight_reg_loss
 68 |     train_accuracy_metric.update_state(y_batch, y_hat)
 69 |     train_cross_entropy_loss_metric.update_state(cross_entropy_loss)
 70 |     train_total_loss_metric.update_state(total_loss)
 71 |     # Update the weights
 72 |     grads = tape.gradient(total_loss, weights)
 73 |     weight_opt.apply_gradients(zip(grads, weights))
 74 |   return train_step
 75 | 
 76 | 
 77 | def get_train_step_arch_function(model, arch_params, arch_opt, train_metrics, arch_metrics):
 78 |   latency_reg_loss_metric = arch_metrics['latency_reg_loss']
 79 |   train_accuracy_metric = train_metrics['accuracy']
 80 |   train_cross_entropy_loss_metric = train_metrics['cross_entropy_loss']
 81 |   total_loss_metric = train_metrics['total_loss']
 82 | 
 83 |   @tf.function
 84 |   def train_step_arch(x_batch, y_batch):
 85 |     with tf.GradientTape() as tape:
 86 |       y_hat = model(x_batch, training=False)
 87 |       cross_entropy_loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_batch, y_hat))
 88 |       weight_reg_loss = tf.reduce_sum(model.losses)
 89 |       latency_reg_loss = losses.parameters_loss(model) / 1.0e6
 90 |       total_loss = cross_entropy_loss + weight_reg_loss  # + latency_reg_loss
 91 |     latency_reg_loss_metric.update_state(latency_reg_loss)
 92 |     train_accuracy_metric.update_state(y_batch, y_hat)
 93 |     train_cross_entropy_loss_metric.update_state(cross_entropy_loss)
 94 |     total_loss_metric.update_state(total_loss)
 95 |     # Update the architecture paramaters
 96 |     grads = tape.gradient(total_loss, arch_params)
 97 |     arch_opt.apply_gradients(zip(grads, arch_params))
 98 |   return train_step_arch
 99 | 
100 | 
101 | def get_eval_step_function(model, metrics):
102 |   val_accuracy_metric = metrics['accuracy']
103 |   val_cross_entropy_loss_metric = metrics['cross_entropy_loss']
104 | 
105 |   @tf.function
106 |   def evaluation_step(x_batch, y_batch):
107 |     y_hat = model(x_batch, training=False)
108 |     loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_batch, y_hat))
109 |     val_accuracy_metric.update_state(y_batch, y_hat)
110 |     val_cross_entropy_loss_metric.update_state(loss)
111 |   return evaluation_step
112 | 
113 | 
114 | def metrics_processing(metrics, summary_writers, keys, template, epoch, postfix=''):
115 |   for key in keys:
116 |     with summary_writers[key].as_default():
117 |       for sub_key in metrics[key]:
118 |         value = float(metrics[key][sub_key].result())  # save metric value
119 |         metrics[key][sub_key].reset_states()  # reset the metric
120 |         template += f", {key}_{sub_key}: {value}"
121 |         tf.summary.scalar(sub_key+postfix, value, step=epoch)
122 |   return template
123 | 
124 | 
125 | def train(args):
126 |   # config_tf2(args['configuration']['xla'])
127 |   # Create log, checkpoint and export directories
128 |   checkpoint_dir, log_dir, export_dir = create_env_directories(args, get_experiment_name(args))
129 |   train_log_dir = os.path.join(log_dir, 'train')
130 |   val_log_dir = os.path.join(log_dir, 'validation')
131 |   arch_log_dir = os.path.join(log_dir, 'arch')
132 |   summary_writers = {
133 |       'train': tf.summary.create_file_writer(train_log_dir),
134 |       'val': tf.summary.create_file_writer(val_log_dir),
135 |       'arch': tf.summary.create_file_writer(arch_log_dir)
136 |   }
137 | 
138 |   # Prepare the 3 datasets
139 |   train_weight_dataset = dataloader.get_dataset(args['dataloader'], transformation_list=args['dataloader']['train_list'],
140 |                                                 num_classes=args["num_classes"], split='train_weights')
141 |   train_arch_dataset = dataloader.get_dataset(args['dataloader'], transformation_list=args['dataloader']['train_list'],
142 |                                               num_classes=args["num_classes"], split='train_arch')
143 |   val_dataset = dataloader.get_dataset(args['dataloader'], transformation_list=args['dataloader']['val_list'],
144 |                                        num_classes=args["num_classes"], split='test')
145 | 
146 |   # define model, optimizer and checkpoint callback
147 |   setup_mp(args)
148 |   model = model_name_to_class[args['model_name']](args['framework'],
149 |                                                   input_shape=args['input_size'],
150 |                                                   label_dim=args['num_classes']).model
151 |   model.summary()
152 | 
153 |   alchemy_api.send_model_info(model, args['server'])
154 |   weights, arch_params = fbnetv2.split_trainable_weights(model)
155 |   weight_opt = get_optimizer(args['optimizer'])
156 |   arch_opt = get_optimizer(args['arch_search']['optimizer'])
157 |   model_checkpoint_cb, latest_epoch = init_custom_checkpoint_callbacks({'model': model}, checkpoint_dir, args['max_checkpoints'] , args['checkpoint_freq'])
158 |   callbacks = [
159 |       model_checkpoint_cb
160 |   ]
161 | 
162 |   temperature_decay_fn = fbnetv2.exponential_decay(args['arch_search']['temperature']['init_value'],
163 |                                                    args['arch_search']['temperature']['decay_steps'],
164 |                                                    args['arch_search']['temperature']['decay_rate'])
165 | 
166 |   lr_decay_fn = CosineDecay(args['optimizer']['lr'],
167 |                             alpha=args["optimizer"]["lr_decay_strategy"]["lr_params"]["alpha"],
168 |                             total_epochs=args['num_epochs'])
169 | 
170 |   lr_decay_fn_arch = CosineDecay(args['arch_search']['optimizer']['lr'],
171 |                                  alpha=0.000001,
172 |                                  total_epochs=args['num_epochs'])
173 | 
174 |   metrics = {
175 |       'arch': {
176 |           'latency_reg_loss': tf.keras.metrics.Mean()
177 |       },
178 |       'train': {
179 |           'total_loss': tf.keras.metrics.Mean(),
180 |           'accuracy': tf.keras.metrics.CategoricalAccuracy(),
181 |           'cross_entropy_loss': tf.keras.metrics.Mean(),
182 |       },
183 |       'val': {
184 |           'accuracy': tf.keras.metrics.CategoricalAccuracy(),
185 |           'cross_entropy_loss': tf.keras.metrics.Mean(),
186 |       }
187 |   }
188 | 
189 |   train_step = get_train_step_function(model, weights, weight_opt, metrics['train'])
190 |   train_step_arch = get_train_step_arch_function(model, arch_params, arch_opt, metrics['train'], metrics['arch'])
191 |   evaluation_step = get_eval_step_function(model, metrics['val'])
192 | 
193 |   for epoch in range(latest_epoch, args['num_epochs']):
194 |     print(f'Epoch: {epoch}/{args["num_epochs"]}')
195 |     # Update both LR
196 |     weight_opt.learning_rate = lr_decay_fn(epoch)
197 |     arch_opt.learning_rate = lr_decay_fn_arch(epoch)
198 |     # Updating the weight parameters using a subset of the training data
199 |     for step, (x_batch, y_batch) in tqdm.tqdm(enumerate(train_weight_dataset, start=1)):
200 |       train_step(x_batch, y_batch)
201 |     # Evaluate the model on validation subset
202 |     for x_batch, y_batch in val_dataset:
203 |       evaluation_step(x_batch, y_batch)
204 |     # Handle metrics
205 |     template = f"Weights updated, Epoch {epoch}"
206 |     template = metrics_processing(metrics, summary_writers, ['train', 'val'], template, epoch)
207 |     template += f", lr: {float(weight_opt.learning_rate)}"
208 |     print(template)
209 | 
210 |     new_temperature = temperature_decay_fn(epoch)
211 |     with summary_writers['train'].as_default():
212 |       tf.summary.scalar('temperature', new_temperature, step=epoch)
213 |     define_temperature(new_temperature)
214 | 
215 |     if epoch >= args['arch_search']['num_warmup']:
216 |       # Updating the architectural parameters on another subset
217 |       for step, (x_batch, y_batch) in tqdm.tqdm(enumerate(train_arch_dataset, start=1)):
218 |         train_step_arch(x_batch, y_batch)
219 |       # Evaluate the model on validation subset
220 |       for x_batch, y_batch in val_dataset:
221 |         evaluation_step(x_batch, y_batch)
222 |       # Handle metrics
223 |       template = f'Architecture updated, Epoch {epoch}'
224 |       template = metrics_processing(metrics, summary_writers, ['train', 'val', 'arch'], template, epoch, postfix='_arch')
225 |       template += f", lr: {float(arch_opt.learning_rate)}"
226 |       print(template)
227 |     # move saved outside of condition so we save starting from the begining
228 |     fbnetv2.save_arch_params(model, epoch, log_dir)
229 | 
230 |     # manually call the callbacks
231 |     for callback in callbacks:
232 |       callback.on_epoch_end(epoch, logs=None)
233 | 
234 |   print("Training Completed!!")
235 | 
236 |   print("Architecture params: ")
237 |   print(arch_params)
238 |   fbnetv2.post_training_analysis(model, args['arch_search']['exported_architecture'])
239 | 
240 | 
241 | if __name__ == '__main__':
242 |   main()
243 | 


--------------------------------------------------------------------------------