├── capsnetcifr100.log ├── data ├── file.txt~ ├── classnames └── classnames.csv ├── README.md ├── histogram.png ├── preprocess.png ├── preprocess_1.png ├── test_images ├── boy.jpeg ├── sea.jpeg ├── clock.jpeg ├── maple.jpeg ├── shrew.jpeg ├── snail.jpeg ├── tank.jpeg ├── tulips.jpg ├── dolphin.jpg ├── leopard.jpeg ├── lizard.jpeg ├── plates.jpeg ├── possum.jpeg ├── wardrobe.jpeg ├── chimpanzee.jpeg ├── mushrooms.jpeg ├── skyscraper.jpeg ├── caterpillar.jpeg └── pickup_truck.jpeg ├── capslayer ├── __init__.py ├── losses.py ├── utils.py ├── ops.py └── layers.py ├── __init__.py ├── hyperparameter └── parameters.json ├── classnames ├── data.py ├── logger └── logger.py ├── utils └── utils.py ├── preprocess_data.py ├── data_visualization.py ├── load_data.py ├── capsnet.py ├── CIFR10+_Keras.py ├── base_model.py ├── model_object_detection.py └── train.py /capsnetcifr100.log: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/file.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Capsule-Networks-Towards-Object-Detection 2 | -------------------------------------------------------------------------------- /histogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/histogram.png -------------------------------------------------------------------------------- /preprocess.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/preprocess.png -------------------------------------------------------------------------------- /preprocess_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/preprocess_1.png -------------------------------------------------------------------------------- /test_images/boy.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/boy.jpeg -------------------------------------------------------------------------------- /test_images/sea.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/sea.jpeg -------------------------------------------------------------------------------- /test_images/clock.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/clock.jpeg -------------------------------------------------------------------------------- /test_images/maple.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/maple.jpeg -------------------------------------------------------------------------------- /test_images/shrew.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/shrew.jpeg -------------------------------------------------------------------------------- /test_images/snail.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/snail.jpeg -------------------------------------------------------------------------------- /test_images/tank.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/tank.jpeg -------------------------------------------------------------------------------- /test_images/tulips.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/tulips.jpg -------------------------------------------------------------------------------- /test_images/dolphin.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/dolphin.jpg -------------------------------------------------------------------------------- /test_images/leopard.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/leopard.jpeg -------------------------------------------------------------------------------- /test_images/lizard.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/lizard.jpeg -------------------------------------------------------------------------------- /test_images/plates.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/plates.jpeg -------------------------------------------------------------------------------- /test_images/possum.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/possum.jpeg -------------------------------------------------------------------------------- /test_images/wardrobe.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/wardrobe.jpeg -------------------------------------------------------------------------------- /test_images/chimpanzee.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/chimpanzee.jpeg -------------------------------------------------------------------------------- /test_images/mushrooms.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/mushrooms.jpeg -------------------------------------------------------------------------------- /test_images/skyscraper.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/skyscraper.jpeg -------------------------------------------------------------------------------- /test_images/caterpillar.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/caterpillar.jpeg -------------------------------------------------------------------------------- /test_images/pickup_truck.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geekfarmer/Capsule-Networks-Towards-Object-Detection/HEAD/test_images/pickup_truck.jpeg -------------------------------------------------------------------------------- /capslayer/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import layers 4 | from . import ops 5 | from . import utils 6 | from . import losses 7 | 8 | __version__ = "0.1.0" 9 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri Feb 23 00:50:59 2018 5 | 6 | @author: optnio 7 | """ 8 | from model_object_detection import Model_Object_Detection 9 | 10 | model = Model_Object_Detection("Object Detection", output_folder="output") 11 | model.init() -------------------------------------------------------------------------------- /hyperparameter/parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "conv_1_size": 9, 3 | "conv_1_nb": 256, 4 | "conv_2_size": 6, 5 | "conv_2_nb": 64, 6 | "conv_2_dropout": 0.7, 7 | "caps_1_vec_len": 16, 8 | "caps_1_size": 5, 9 | "caps_1_nb_filter": 16, 10 | "caps_2_vec_len": 32, 11 | "learning_rate": 0.001, 12 | "routing_steps": 2 13 | } 14 | -------------------------------------------------------------------------------- /classnames: -------------------------------------------------------------------------------- 1 | beaver, dolphin, otter, seal, whale 2 | aquarium fish, flatfish, ray, shark, trout 3 | orchids, poppies, roses, sunflowers, tulips 4 | bottles, bowls, cans, cups, plates 5 | apples, mushrooms, oranges, pears, sweet peppers 6 | clock, computer keyboard, lamp, telephone, television 7 | bed, chair, couch, table, wardrobe 8 | bee, beetle, butterfly, caterpillar, cockroach 9 | bear, leopard, lion, tiger, wolf 10 | bridge, castle, house, road, skyscraper 11 | cloud, forest, mountain, plain, sea 12 | camel, cattle, chimpanzee, elephant, kangaroo 13 | fox, porcupine, possum, raccoon, skunk 14 | crab, lobster, snail, spider, worm 15 | baby, boy, girl, man, woman 16 | crocodile, dinosaur, lizard, snake, turtle 17 | hamster, mouse, rabbit, shrew, squirrel 18 | maple, oak, palm, pine, willow 19 | bicycle, bus, motorcycle, pickup truck, train 20 | lawn-mower, rocket, streetcar, tank, tractor 21 | -------------------------------------------------------------------------------- /data/classnames: -------------------------------------------------------------------------------- 1 | beaver, dolphin, otter, seal, whale 2 | aquarium fish, flatfish, ray, shark, trout 3 | orchids, poppies, roses, sunflowers, tulips 4 | bottles, bowls, cans, cups, plates 5 | apples, mushrooms, oranges, pears, sweet peppers 6 | clock, computer keyboard, lamp, telephone, television 7 | bed, chair, couch, table, wardrobe 8 | bee, beetle, butterfly, caterpillar, cockroach 9 | bear, leopard, lion, tiger, wolf 10 | bridge, castle, house, road, skyscraper 11 | cloud, forest, mountain, plain, sea 12 | camel, cattle, chimpanzee, elephant, kangaroo 13 | fox, porcupine, possum, raccoon, skunk 14 | crab, lobster, snail, spider, worm 15 | baby, boy, girl, man, woman 16 | crocodile, dinosaur, lizard, snake, turtle 17 | hamster, mouse, rabbit, shrew, squirrel 18 | maple, oak, palm, pine, willow 19 | bicycle, bus, motorcycle, pickup truck, train 20 | lawn-mower, rocket, streetcar, tank, tractor 21 | -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri Feb 23 21:23:02 2018 5 | 6 | @author: optnio 7 | """ 8 | 9 | import os 10 | import pickle 11 | 12 | TRAIN_FILE = "train" 13 | VALID_FILE = "train" 14 | TEST_FILE = "test" 15 | 16 | def get_data(folder): 17 | 18 | # Load the dataset 19 | training_file = os.path.join(folder, TRAIN_FILE) 20 | validation_file= os.path.join(folder, VALID_FILE) 21 | testing_file = os.path.join(folder, TEST_FILE) 22 | 23 | with open(training_file, mode='rb') as f: 24 | train = pickle.load(f) 25 | with open(validation_file, mode='rb') as f: 26 | valid = pickle.load(f) 27 | with open(testing_file, mode='rb') as f: 28 | test = pickle.load(f) 29 | 30 | # Retrive all datas 31 | x_train, y_train = train['features'], train['labels'] 32 | x_valid, y_valid = valid['features'], valid['labels'] 33 | x_test, y_test = test['features'], test['labels'] 34 | 35 | return x_train, y_train, x_valid, y_valid, x_test, y_test -------------------------------------------------------------------------------- /capslayer/losses.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def spread_loss(labels, logits, margin, regularizer=None): 5 | ''' 6 | Args: 7 | labels: [batch_size, num_label, 1]. 8 | logits: [batch_size, num_label, 1]. 9 | margin: Integer or 1-D Tensor. 10 | regularizer: use regularization. 11 | 12 | Returns: 13 | loss: Spread loss. 14 | ''' 15 | # a_target: [batch_size, 1, 1] 16 | a_target = tf.matmul(labels, logits, transpose_a=True) 17 | dist = tf.maximum(0., margin - (a_target - logits)) 18 | loss = tf.reduce_mean(tf.square(tf.matmul(1 - labels, dist, transpose_a=True))) 19 | if regularizer is not None: 20 | regularizer = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) 21 | loss += tf.reduce_mean(regularizer) 22 | return(loss) 23 | 24 | 25 | def margin_loss(): 26 | pass 27 | 28 | 29 | def cross_entropy(labels, logits, regularizer=None): 30 | ''' 31 | Args: 32 | ... 33 | 34 | Returns: 35 | ... 36 | ''' 37 | loss = tf.losses.sparse_softmax_cross_entropy(labels, logits) 38 | if regularizer is not None: 39 | regularizer = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) 40 | loss += tf.reduce_mean(regularizer) 41 | return(loss) 42 | -------------------------------------------------------------------------------- /logger/logger.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Feb 22 23:37:25 2018 5 | 6 | @author: optnio 7 | """ 8 | 9 | import logging 10 | from logging.handlers import RotatingFileHandler 11 | 12 | logger = logging.getLogger() 13 | logger.setLevel(logging.DEBUG) 14 | formatter = logging.Formatter('%(asctime)s:: %(levelname)s:: %(message)s') 15 | file_handler = RotatingFileHandler('capsnetcifr100.log', 'a', 1000000, 1) 16 | file_handler.setLevel(logging.INFO) 17 | file_handler.setFormatter(formatter) 18 | logger.addHandler(file_handler) 19 | stream_handler = logging.StreamHandler() 20 | stream_handler.setLevel(logging.DEBUG) 21 | logger.addHandler(stream_handler) 22 | 23 | 24 | class Logger(object): 25 | 26 | def __init__(self, label): 27 | super(Logger, self).__init__() 28 | self.label = label 29 | self.logger = logger 30 | 31 | def debug(self, string): 32 | self.logger.debug("%s::%s" % (self.label, string)) 33 | 34 | def info(self, string): 35 | self.logger.info("%s::%s" % (self.label, string)) 36 | 37 | def warning(self, string): 38 | self.logger.warning("%s::%s" % (self.label, string)) 39 | 40 | def error(self, string): 41 | self.logger.error("%s::%s" % (self.label, string)) 42 | 43 | def critical(self, string): 44 | self.logger.critical("%s::%s" % (self.label, string)) 45 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Feb 22 23:19:02 2018 5 | 6 | @author: optnio 7 | """ 8 | 9 | import numpy as np 10 | import json 11 | import sys 12 | import os 13 | 14 | 15 | class Utils(object): 16 | """ 17 | Util class to store all common method use in this project 18 | """ 19 | 20 | def __init__(self, arg): 21 | super(Utils, self).__init__() 22 | 23 | @staticmethod 24 | def progress(count, total, suffix=''): 25 | """ 26 | Utils method to display a progress bar 27 | **input: ** 28 | *count: current progression 29 | *total: Max progress bar length 30 | """ 31 | bar_len = 60 32 | filled_len = int(round(bar_len * count / float(total))) 33 | 34 | percents = round(100.0 * count / float(total), 1) 35 | bar = '=' * filled_len + '-' * (bar_len - filled_len) 36 | 37 | sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', suffix)) 38 | sys.stdout.flush() 39 | 40 | @staticmethod 41 | def read_json_file(path): 42 | """ 43 | Utils method to open, read and return a json file content 44 | **input: ** 45 | *path: (String) Path to the json file to read 46 | """ 47 | with open(path, "r") as f: 48 | json_content = json.loads(f.read()) 49 | return json_content -------------------------------------------------------------------------------- /data/classnames.csv: -------------------------------------------------------------------------------- 1 | ClassID,ClassNames 2 | 1,beaver 3 | 2,dolphin 4 | 3,otter 5 | 4,seal 6 | 5,whale 7 | 6,aquarium fish 8 | 7,flatfish 9 | 8,ray 10 | 9,shark 11 | 10,trout 12 | 11,orchids 13 | 12,poppies 14 | 13,roses 15 | 14,sunflowers 16 | 15,tulips 17 | 16,bottles 18 | 17,bowls 19 | 18,cans 20 | 19,cups 21 | 20,plates 22 | 21,apples 23 | 22,mushrooms 24 | 23,oranges 25 | 24,pears 26 | 25,sweet peppers 27 | 26,clock 28 | 27,computer keyboard 29 | 28,lamp 30 | 29,telephone 31 | 30,television 32 | 31,bed 33 | 32,chair 34 | 33,couch 35 | 34,table 36 | 35,wardrobe 37 | 36,bee 38 | 37,beetle 39 | 38,butterfly 40 | 39,caterpillar 41 | 40,cockroach 42 | 41,bear 43 | 42,leopard 44 | 43,lion 45 | 44,tiger 46 | 45,wolf 47 | 46,bridge 48 | 47,castle 49 | 48,house 50 | 49,road 51 | 50,skyscraper 52 | 51,cloud 53 | 52,forest 54 | 53,mountain 55 | 54,plain 56 | 55,sea 57 | 56,camel 58 | 57,cattle 59 | 58,chimpanzee 60 | 59,elephant 61 | 60,kangaroo 62 | 61,fox 63 | 62,porcupine 64 | 63,possum 65 | 64,raccoon 66 | 65,skunk 67 | 66,crab 68 | 67,lobster 69 | 68,snail 70 | 69,spider 71 | 70,worm 72 | 71,baby 73 | 72,boy 74 | 73,girl 75 | 74,man 76 | 75,woman 77 | 76,crocodile 78 | 77,dinosaur 79 | 78,lizard 80 | 79,snake 81 | 80,turtle 82 | 81,hamster 83 | 82,mouse 84 | 83,rabbit 85 | 84,shrew 86 | 85,squirrel 87 | 86,maple 88 | 87,oak 89 | 88,palm 90 | 89,pine 91 | 90,willow 92 | 91,bicycle 93 | 92,bus 94 | 93,motorcycle 95 | 94,pickup truck 96 | 95,train 97 | 96,lawn-mower 98 | 97,rocket 99 | 98,streetcar 100 | 99,tank 101 | 100,tractor 102 | -------------------------------------------------------------------------------- /preprocess_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Feb 22 15:27:23 2018 5 | 6 | @author: optnio 7 | """ 8 | 9 | import matplotlib.pyplot as plt 10 | import random 11 | from PIL import Image 12 | import numpy as np 13 | import random 14 | from PIL import Image, ImageEnhance 15 | from keras.preprocessing.image import ImageDataGenerator 16 | from load_data import x_train,y_train,x_test,y_test,n_classes 17 | 18 | 19 | #x_train = x_train / 255 20 | #x_test = x_test / 255 21 | 22 | def preprocessing_function(img): 23 | """ 24 | Custom preprocessing_function 25 | """ 26 | img = img *255 27 | img = Image.fromarray(img.astype('uint8'), 'RGB') 28 | img = ImageEnhance.Brightness(img).enhance(random.uniform(0.6, 1.5)) 29 | img = ImageEnhance.Contrast(img).enhance(random.uniform(0.6, 1.5)) 30 | 31 | return np.array(img) / 255 32 | 33 | train_datagen = ImageDataGenerator() 34 | train_datagen_augmented = ImageDataGenerator( 35 | rotation_range=20, 36 | shear_range=0.2, 37 | width_shift_range=0.2, 38 | height_shift_range=0.2, 39 | horizontal_flip=True, 40 | preprocessing_function=preprocessing_function) 41 | inference_datagen = ImageDataGenerator() 42 | train_datagen.fit(x_train) 43 | train_datagen_augmented.fit(x_train) 44 | inference_datagen.fit(x_test) 45 | 46 | fig = plt.figure() 47 | 48 | n = 0 49 | 50 | graph_size = 3 51 | 52 | for x_batch, y_batch in train_datagen_augmented.flow(x_train, y_train, batch_size=1): 53 | a=fig.add_subplot(graph_size, graph_size, n+1) 54 | imgplot = plt.imshow(x_batch[0]) 55 | n = n + 1 56 | if n > 8: 57 | break 58 | 59 | 60 | plt.show() -------------------------------------------------------------------------------- /data_visualization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Feb 22 00:03:57 2018 5 | 6 | @author: optnio 7 | """ 8 | 9 | import matplotlib.pyplot as plt 10 | import random 11 | from PIL import Image 12 | import numpy as np 13 | import random 14 | from PIL import Image, ImageEnhance 15 | # Visualizations will be shown in the notebook. 16 | from load_data import x_train,y_train,x_test,y_test,n_classes 17 | 18 | # Load name of id 19 | with open("./data/classnames.csv", "r") as f: 20 | classnames = f.read() 21 | id_to_name = { int(line.split(",")[0]):line.split(",")[1] for line in classnames.split("\n")[1:] if len(line) > 0} 22 | 23 | 24 | graph_size = 3 25 | random_index_list = [random.randint(0, x_train.shape[0]) for _ in range(graph_size * graph_size)] 26 | fig = plt.figure(figsize=(15, 15)) 27 | """for i, index in enumerate(random_index_list): 28 | a=fig.add_subplot(graph_size, graph_size, i+1) 29 | #im = Image.fromarray(np.rollaxis(X_train[index] * 255, 0,3)) 30 | imgplot = plt.imshow(x_train[index]) 31 | # Plot some images 32 | a.set_title('%s' % id_to_name[y_train[index]]) 33 | 34 | #plt.show() 35 | 36 | """ 37 | 38 | fig, ax = plt.subplots() 39 | # the histogram of the data 40 | values, bins, patches = ax.hist(y_train, n_classes, normed=10) 41 | 42 | # add a 'best fit' line 43 | ax.set_xlabel('Smarts') 44 | ax.set_title('Histogram of classess') 45 | 46 | # Tweak spacing to prevent clipping of ylabel 47 | fig.tight_layout() 48 | 49 | print ("Most common index") 50 | most_common_index = sorted(range(len(values)), key=lambda k: values[k], reverse=True) 51 | for index in most_common_index[:30]: 52 | print("index: %s => %s = %s" % (index, id_to_name[index], values[index])) 53 | 54 | -------------------------------------------------------------------------------- /load_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Spyder Editor 4 | 5 | This is a temporary script file. 6 | """ 7 | 8 | import random 9 | import cv2 10 | from keras.datasets import cifar10 11 | from keras.utils import to_categorical 12 | from keras.models import Model 13 | from keras.layers import Dense, GlobalAveragePooling2D 14 | from keras.optimizers import SGD 15 | #from keras.applications.vgg19 import VGG19 16 | from keras.applications.vgg16 import VGG16 17 | import numpy as np 18 | 19 | # read data 20 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 21 | 22 | # limit the amount of the data 23 | # train data 24 | ind_train = random.sample(list(range(x_train.shape[0])), 50000) 25 | x_train = x_train[ind_train] 26 | y_train = y_train[ind_train] 27 | 28 | # test data 29 | ind_test = random.sample(list(range(x_test.shape[0])), 10000) 30 | x_test = x_test[ind_test] 31 | y_test = y_test[ind_test] 32 | """ 33 | def resize_data(data): 34 | data_upscaled = np.zeros((data.shape[0], 28, 28, 1)) 35 | for i, img in enumerate(data): 36 | large_img = cv2.resize(img, dsize=(28, 28), interpolation=cv2.INTER_CUBIC) 37 | data_upscaled[i] = large_img 38 | 39 | return data_upscaled 40 | 41 | 42 | # resize train and test data 43 | x_train_resized = resize_data(x_train) 44 | x_test_resized = resize_data(x_test) 45 | 46 | # make explained variable hot-encoded 47 | y_train_hot_encoded = to_categorical(y_train) 48 | y_test_hot_encoded = to_categorical(y_test) 49 | 50 | """ 51 | # TODO: Number of training example 52 | n_train = x_train.shape[0] 53 | 54 | # TODO: Number of testing example. 55 | n_test = x_test.shape[0] 56 | 57 | # TODO: What's the shape of an traffic sign image? 58 | image_shape = x_train.shape[1:] 59 | 60 | # TODO: How many unique classes/labels there are in the dataset. 61 | #n_classes = len(set(y_train)) 62 | n_classes = 100 63 | """print(x_train.shape) 64 | print(y_train.shape) 65 | 66 | 67 | 68 | print("Number of training examples =", n_train) 69 | print("Number of testing examples =", n_test) 70 | print("Image data shape =", image_shape) 71 | print("Number of classes =", n_classes) 72 | 73 | """ 74 | 75 | -------------------------------------------------------------------------------- /capslayer/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import scipy 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | 7 | def reduce_sum(input_tensor, axis=None, keepdims=False, name=None): 8 | try: 9 | return tf.reduce_sum(input_tensor, axis=axis, keepdims=keepdims, name=name) 10 | except: 11 | return tf.reduce_sum(input_tensor, axis=axis, keep_dims=keepdims, name=name) 12 | 13 | 14 | def softmax(logits, axis=None, name=None): 15 | try: 16 | return tf.nn.softmax(logits, axis=axis, name=name) 17 | except: 18 | return tf.nn.softmax(logits, dim=axis, name=name) 19 | 20 | 21 | def euclidean_norm(input, axis=2, keepdims=True, epsilon=True): 22 | if epsilon: 23 | norm = tf.sqrt(reduce_sum(tf.square(input), axis=axis, keepdims=keepdims) + 1e-9) 24 | else: 25 | norm = tf.sqrt(reduce_sum(tf.square(input), axis=axis, keepdims=keepdims)) 26 | 27 | return(norm) 28 | 29 | 30 | def load_mnist(batch_size, is_training=True): 31 | path = os.path.join('models', 'data', 'mnist') 32 | if is_training: 33 | fd = open(os.path.join(path, 'train-images-idx3-ubyte')) 34 | loaded = np.fromfile(file=fd, dtype=np.uint8) 35 | trainX = loaded[16:].reshape((60000, 784)).astype(np.float32) 36 | 37 | fd = open(os.path.join(path, 'train-labels-idx1-ubyte')) 38 | loaded = np.fromfile(file=fd, dtype=np.uint8) 39 | trainY = loaded[8:].reshape((60000)).astype(np.int32) 40 | 41 | trX = trainX[:55000] / 255. 42 | trY = trainY[:55000] 43 | 44 | valX = trainX[55000:, ] / 255. 45 | valY = trainY[55000:] 46 | 47 | num_tr_batch = 55000 // batch_size 48 | num_val_batch = 5000 // batch_size 49 | 50 | return trX, trY, num_tr_batch, valX, valY, num_val_batch 51 | else: 52 | fd = open(os.path.join(path, 't10k-images-idx3-ubyte')) 53 | loaded = np.fromfile(file=fd, dtype=np.uint8) 54 | teX = loaded[16:].reshape((10000, 784)).astype(np.float) 55 | 56 | fd = open(os.path.join(path, 't10k-labels-idx1-ubyte')) 57 | loaded = np.fromfile(file=fd, dtype=np.uint8) 58 | teY = loaded[8:].reshape((10000)).astype(np.int32) 59 | 60 | num_te_batch = 10000 // batch_size 61 | return teX / 255., teY, num_te_batch 62 | 63 | 64 | def load_fashion_mnist(batch_size, is_training=True): 65 | path = os.path.join('models', 'data', 'fashion-mnist') 66 | if is_training: 67 | fd = open(os.path.join(path, 'train-images-idx3-ubyte')) 68 | loaded = np.fromfile(file=fd, dtype=np.uint8) 69 | trainX = loaded[16:].reshape((60000, 784)).astype(np.float32) 70 | 71 | fd = open(os.path.join(path, 'train-labels-idx1-ubyte')) 72 | loaded = np.fromfile(file=fd, dtype=np.uint8) 73 | trainY = loaded[8:].reshape((60000)).astype(np.int32) 74 | 75 | trX = trainX[:55000] / 255. 76 | trY = trainY[:55000] 77 | 78 | valX = trainX[55000:, ] / 255. 79 | valY = trainY[55000:] 80 | 81 | num_tr_batch = 55000 // batch_size 82 | num_val_batch = 5000 // batch_size 83 | 84 | return trX, trY, num_tr_batch, valX, valY, num_val_batch 85 | else: 86 | fd = open(os.path.join(path, 't10k-images-idx3-ubyte')) 87 | loaded = np.fromfile(file=fd, dtype=np.uint8) 88 | teX = loaded[16:].reshape((10000, 784)).astype(np.float) 89 | 90 | fd = open(os.path.join(path, 't10k-labels-idx1-ubyte')) 91 | loaded = np.fromfile(file=fd, dtype=np.uint8) 92 | teY = loaded[8:].reshape((10000)).astype(np.int32) 93 | 94 | num_te_batch = 10000 // batch_size 95 | return teX / 255., teY, num_te_batch 96 | 97 | 98 | def load_smallNORB(batch_size, is_training=True): 99 | pass 100 | 101 | 102 | def load_data(dataset, batch_size, is_training=True, one_hot=False): 103 | if dataset == 'mnist': 104 | return load_mnist(batch_size, is_training) 105 | elif dataset == 'fashion-mnist': 106 | return load_fashion_mnist(batch_size, is_training) 107 | elif dataset == 'smallNORB': 108 | return load_smallNORB(batch_size, is_training) 109 | else: 110 | raise Exception('Invalid dataset, please check the name of dataset:', dataset) 111 | 112 | 113 | def get_batch_data(dataset, batch_size, num_threads): 114 | if dataset == 'mnist': 115 | trX, trY, num_tr_batch, valX, valY, num_val_batch = load_mnist(batch_size, is_training=True) 116 | elif dataset == 'fashion-mnist': 117 | trX, trY, num_tr_batch, valX, valY, num_val_batch = load_fashion_mnist(batch_size, is_training=True) 118 | elif dataset == 'smallNORB': 119 | trX, trY, num_tr_batch, valX, valY, num_val_batch = load_smallNORB(batch_size, is_training=True) 120 | data_queues = tf.train.slice_input_producer([trX, trY]) 121 | X, Y = tf.train.shuffle_batch(data_queues, num_threads=num_threads, 122 | batch_size=batch_size, 123 | capacity=batch_size * 64, 124 | min_after_dequeue=batch_size * 32, 125 | allow_smaller_final_batch=False) 126 | 127 | return(X, Y) 128 | 129 | 130 | def save_images(imgs, size, path): 131 | ''' 132 | Args: 133 | imgs: [batch_size, image_height, image_width] 134 | size: a list with tow int elements, [image_height, image_width] 135 | path: the path to save images 136 | ''' 137 | imgs = (imgs + 1.) / 2 # inverse_transform 138 | return(scipy.misc.imsave(path, mergeImgs(imgs, size))) 139 | 140 | 141 | def mergeImgs(images, size): 142 | h, w = images.shape[1], images.shape[2] 143 | imgs = np.zeros((h * size[0], w * size[1], 3)) 144 | for idx, image in enumerate(images): 145 | i = idx % size[1] 146 | j = idx // size[1] 147 | imgs[j * h:j * h + h, i * w:i * w + w, :] = image 148 | 149 | return imgs 150 | 151 | 152 | def get_transformation_matrix_shape(in_pose_shape, out_pose_shape): 153 | return([out_pose_shape[0], in_pose_shape[0]]) 154 | -------------------------------------------------------------------------------- /capslayer/ops.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from capslayer.utils import reduce_sum 5 | from capslayer.utils import softmax 6 | 7 | epsilon = 1e-9 8 | 9 | def squash(vector): 10 | '''Squashing function 11 | Args: 12 | vector: A tensor with shape [batch_size, 1, num_caps, vec_len, 1] or [batch_size, num_caps, vec_len, 1] 13 | Returns: 14 | A tensor with the same shape as vector but squashed in 'vec_len' dimension. 15 | ''' 16 | squared_norm = reduce_sum(tf.square(vector), axis=-2, keepdims=True) 17 | scalar_factor = squared_norm / (1 + squared_norm) / tf.sqrt(squared_norm + epsilon) 18 | return(scalar_factor * vector) 19 | 20 | 21 | def routing(vote, 22 | activation=None, 23 | num_outputs=32, 24 | out_caps_shape=[4, 4], 25 | method='EMRouting', 26 | num_iter=3, 27 | regularizer=None): 28 | ''' Routing-by-agreement algorithm. 29 | Args: 30 | alias H = out_caps_shape[0]*out_caps_shape[1]. 31 | 32 | vote: [batch_size, num_inputs, num_outputs, H]. 33 | activation: [batch_size, num_inputs, 1, 1]. 34 | num_outputs: ... 35 | out_caps_shape: ... 36 | method: method for updating coupling coefficients between vote and pose['EMRouting', 'DynamicRouting']. 37 | num_iter: the number of routing iteration. 38 | regularizer: A (Tensor -> Tensor or None) function; the result of applying it on a newly created variable 39 | will be added to the collection tf.GraphKeys.REGULARIZATION_LOSSES and can be used for regularization. 40 | 41 | Returns: 42 | pose: [batch_size, 1, 1, num_outputs] + out_caps_shape. 43 | activation: [batch_size, 1, 1, num_outputs]. 44 | ''' 45 | vote_stopped = tf.stop_gradient(vote, name="stop_gradient") 46 | batch_size = vote.shape[0].value 47 | if method == 'EMRouting': 48 | shape = vote.get_shape().as_list()[:3] + [1] 49 | # R: [batch_size, num_inputs, num_outputs, 1] 50 | R = tf.constant(np.ones(shape, dtype=np.float32) / num_outputs) 51 | for t_iter in range(num_iter): 52 | with tf.variable_scope('M-STEP') as scope: 53 | if t_iter > 0: 54 | scope.reuse_variables() 55 | # It's no need to do the `E-STEP` in the last iteration 56 | if t_iter == num_iter - 1: 57 | pose, stddev, activation_prime = M_step(R, activation, vote) 58 | break 59 | else: 60 | pose, stddev, activation_prime = M_step(R, activation, vote_stopped) 61 | with tf.variable_scope('E-STEP'): 62 | R = E_step(pose, stddev, activation_prime, vote_stopped) 63 | pose = tf.reshape(pose, shape=[batch_size, 1, 1, num_outputs] + out_caps_shape) 64 | activation = tf.reshape(activation_prime, shape=[batch_size, 1, 1, -1]) 65 | return(pose, activation) 66 | elif method == 'DynamicRouting': 67 | B = tf.constant(np.zeros([batch_size, vote.shape[1].value, num_outputs, 1, 1], dtype=np.float32)) 68 | for r_iter in range(num_iter): 69 | with tf.variable_scope('iter_' + str(r_iter)): 70 | coef = softmax(B, axis=2) 71 | if r_iter == num_iter - 1: 72 | s = reduce_sum(tf.multiply(coef, vote), axis=1, keepdims=True) 73 | pose = squash(s) 74 | else: 75 | s = reduce_sum(tf.multiply(coef, vote_stopped), axis=1, keepdims=True) 76 | pose = squash(s) 77 | shape = [batch_size, vote.shape[1].value, num_outputs] + out_caps_shape 78 | pose = tf.multiply(pose, tf.constant(1., shape=shape)) 79 | B += tf.matmul(vote_stopped, pose, transpose_a=True) 80 | return(pose, activation) 81 | 82 | else: 83 | raise Exception('Invalid routing method!', method) 84 | 85 | 86 | def M_step(R, activation, vote, lambda_val=0.9, regularizer=None): 87 | ''' 88 | Args: 89 | alias H = out_caps_shape[0]*out_caps_shape[1] 90 | 91 | vote: [batch_size, num_inputs, num_outputs, H] 92 | activation: [batch_size, num_inputs, 1, 1] 93 | R: [batch_size, num_inputs, num_outputs, 1] 94 | lambda_val: ... 95 | 96 | Returns: 97 | pose & stddev: [batch_size, 1, num_outputs, H] 98 | activation: [batch_size, 1, num_outputs, 1] 99 | ''' 100 | batch_size = vote.shape[0].value 101 | # line 2 102 | R = tf.multiply(R, activation) 103 | R_sum_i = tf.reduce_sum(R, axis=1, keepdims=True) + epsilon 104 | 105 | # line 3 106 | # mean: [batch_size, 1, num_outputs, H] 107 | pose = tf.reduce_sum(R * vote, axis=1, keepdims=True) / R_sum_i 108 | 109 | # line 4 110 | stddev = tf.sqrt(tf.reduce_sum(R * tf.square(vote - pose), axis=1, keepdims=True) / R_sum_i + epsilon) 111 | 112 | # line 5, cost: [batch_size, 1, num_outputs, H] 113 | H = vote.shape[-1].value 114 | beta_v = tf.get_variable('beta_v', shape=[batch_size, 1, pose.shape[2].value, H], regularizer=regularizer) 115 | cost = (beta_v + tf.log(stddev)) * R_sum_i 116 | 117 | # line 6 118 | beta_a = tf.get_variable('beta_a', shape=[batch_size, 1, pose.shape[2], 1], regularizer=regularizer) 119 | activation = tf.nn.sigmoid(lambda_val * (beta_a - tf.reduce_sum(cost, axis=3, keepdims=True))) 120 | 121 | return(pose, stddev, activation) 122 | 123 | 124 | def E_step(pose, stddev, activation, vote): 125 | ''' 126 | Args: 127 | alias H = out_caps_shape[0]*out_caps_shape[1] 128 | 129 | pose & stddev: [batch_size, 1, num_outputs, H] 130 | activation: [batch_size, 1, num_outputs, 1] 131 | vote: [batch_size, num_inputs, num_outputs, H] 132 | 133 | Returns: 134 | pose & var: [batch_size, 1, num_outputs, H] 135 | activation: [batch_size, 1, num_outputs, 1] 136 | ''' 137 | # line 2 138 | var = tf.square(stddev) 139 | x = tf.reduce_sum(tf.square(vote - pose) / (2 * var), axis=-1, keepdims=True) 140 | peak_height = 1 / (tf.reduce_prod(tf.sqrt(2 * np.pi * var + epsilon), axis=-1, keepdims=True) + epsilon) 141 | P = peak_height * tf.exp(-x) 142 | 143 | # line 3 144 | R = tf.nn.softmax(activation * P, axis=2) 145 | return(R) 146 | -------------------------------------------------------------------------------- /capsnet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Feb 22 16:10:31 2018 5 | 6 | @author: optnio 7 | """ 8 | 9 | import numpy as np 10 | import tensorflow as tf 11 | import numpy as np 12 | 13 | 14 | def conv_caps_layer(input_layer, capsules_size, nb_filters, kernel, stride=2): 15 | """ 16 | Capsule layer for the convolutional inputs 17 | **input: 18 | *input_layer: (Tensor) 19 | *capsule_numbers: (Integer) the number of capsule in this layer. 20 | *kernel_size: (Integer) Size of the kernel for each filter. 21 | *stride: (Integer) 2 by default 22 | """ 23 | # "In convolutional capsule layers each unit in a capsule is a convolutional unit. 24 | # Therefore, each capsule will output a grid of vectors rather than a single vector output." 25 | capsules = tf.contrib.layers.conv2d( 26 | input_layer, nb_filters * capsules_size, kernel, stride, padding="VALID") 27 | # conv shape: [?, kernel, kernel, nb_filters] 28 | shape = capsules.get_shape().as_list() 29 | capsules = tf.reshape(capsules, shape=(-1, np.prod(shape[1:3]) * nb_filters, capsules_size, 1)) 30 | # capsules shape: [?, nb_capsules, capsule_size, 1] 31 | return squash(capsules) 32 | 33 | def routing(u_hat, b_ij, nb_capsules, nb_capsules_p, iterations=4): 34 | """ 35 | Routing algorithm 36 | 37 | **input: 38 | *u_hat: Dot product (weights between previous capsule and current capsule) 39 | *b_ij: the log prior probabilities that capsule i should be coupled to capsule j 40 | *nb_capsules_p: Number of capsule in the previous layer 41 | *nb_capsules: Number of capsule in this layer 42 | """ 43 | # Start the routing algorithm 44 | for it in range(iterations): 45 | with tf.variable_scope('routing_' + str(it)): 46 | # Line 4 of algo 47 | # probabilities that capsule i should be coupled to capsule j. 48 | # c_ij: [nb_capsules_p, nb_capsules, 1, 1] 49 | c_ij = tf.nn.softmax(b_ij, dim=2) 50 | 51 | # Line 5 of algo 52 | # c_ij: [ nb_capsules_p, nb_capsules, 1, 1] 53 | # u_hat: [?, nb_capsules_p, nb_capsules, len_v_j, 1] 54 | s_j = tf.multiply(c_ij, u_hat) 55 | # s_j: [?, nb_capsules_p, nb_capsules, len_v_j, 1] 56 | s_j = tf.reduce_sum(s_j, axis=1, keep_dims=True) 57 | # s_j: [?, 1, nb_capsules, len_v_j, 1) 58 | 59 | # line 6: 60 | # squash using Eq.1, 61 | v_j = squash(s_j) 62 | # v_j: [1, 1, nb_capsules, len_v_j, 1) 63 | 64 | # line 7: 65 | # Frist reshape & tile v_j 66 | # [? , 1, nb_capsules, len_v_j, 1] -> 67 | # [?, nb_capsules_p, nb_capsules, len_v_j, 1] 68 | v_j_tiled = tf.tile(v_j, [1, nb_capsules_p, 1, 1, 1]) 69 | # u_hat: [?, nb_capsules_p, nb_capsules, len_v_j, 1] 70 | # v_j_tiled [1, nb_capsules_p, nb_capsules, len_v_j, 1] 71 | u_dot_v = tf.matmul(u_hat, v_j_tiled, transpose_a=True) 72 | # u_produce_v: [?, nb_capsules_p, nb_capsules, 1, 1] 73 | b_ij += tf.reduce_sum(u_dot_v, axis=0, keep_dims=True) 74 | #b_ih: [1, nb_capsules_p, nb_capsules, 1, 1] 75 | 76 | return tf.squeeze(v_j, axis=1) 77 | 78 | def fully_connected_caps_layer(input_layer, capsules_size, nb_capsules, iterations=4): 79 | """ 80 | Second layer receiving inputs from all capsules of the layer below 81 | **input: 82 | *input_layer: (Tensor) 83 | *capsules_size: (Integer) Size of each capsule 84 | *nb_capsules: (Integer) Number of capsule 85 | *iterations: (Integer) Number of iteration for the routing algorithm 86 | 87 | i refer to the layer below. 88 | j refer to the layer above (the current layer). 89 | """ 90 | shape = input_layer.get_shape().as_list() 91 | # Get the size of each capsule in the previous layer and the current layer. 92 | len_u_i = np.prod(shape[2]) 93 | len_v_j = capsules_size 94 | # Get the number of capsule in the layer bellow. 95 | nb_capsules_p = np.prod(shape[1]) 96 | 97 | # w_ij: Used to compute u_hat by multiplying the output ui of a capsule in the layer below 98 | # with this matrix 99 | # [nb_capsules_p, nb_capsules, len_v_j, len_u_i] 100 | _init = tf.random_normal_initializer(stddev=0.01, seed=0) 101 | _shape = (nb_capsules_p, nb_capsules, len_v_j, len_u_i) 102 | w_ij = tf.get_variable('weight', shape=_shape, dtype=tf.float32, initializer=_init) 103 | 104 | # Adding one dimension to the input [batch_size, nb_capsules_p, length(u_i), 1] -> 105 | # [batch_size, nb_capsules_p, 1, length(u_i), 1] 106 | # To allow the next dot product 107 | input_layer = tf.reshape(input_layer, shape=(-1, nb_capsules_p, 1, len_u_i, 1)) 108 | input_layer = tf.tile(input_layer, [1, 1, nb_capsules, 1, 1]) 109 | 110 | # Eq.2, calc u_hat 111 | # Prediction uj|i made by capsule i 112 | # w_ij: [ nb_capsules_p, nb_capsules, len_v_j, len_u_i, ] 113 | # input: [batch_size, nb_capsules_p, nb_capsules, len_ui, 1] 114 | # u_hat: [batch_size, nb_capsules_p, nb_capsules, len_v_j, 1] 115 | # Each capsule of the previous layer capsule layer is associated to a capsule of this layer 116 | u_hat = tf.einsum('abdc,iabcf->iabdf', w_ij, input_layer) 117 | 118 | # bij are the log prior probabilities that capsule i should be coupled to capsule j 119 | # [nb_capsules_p, nb_capsules, 1, 1] 120 | b_ij = tf.zeros(shape=[nb_capsules_p, nb_capsules, 1, 1], dtype=np.float32) 121 | 122 | return routing(u_hat, b_ij, nb_capsules, nb_capsules_p, iterations=iterations) 123 | 124 | def squash(vector): 125 | """ 126 | Squashing function corresponding to Eq. 1 127 | **input: ** 128 | *vector 129 | """ 130 | vector += 0.00001 # Workaround for the squashing function ... 131 | vec_squared_norm = tf.reduce_sum(tf.square(vector), -2, keep_dims=True) 132 | scalar_factor = vec_squared_norm / (1 + vec_squared_norm) / tf.sqrt(vec_squared_norm) 133 | vec_squashed = scalar_factor * vector # element-wise 134 | return(vec_squashed) 135 | -------------------------------------------------------------------------------- /CIFR10+_Keras.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[ ]: 5 | 6 | 7 | from __future__ import print_function 8 | from keras import backend as K 9 | from keras.engine.topology import Layer 10 | from keras import activations 11 | from keras import utils 12 | from keras.datasets import cifar10 13 | from keras.models import Model 14 | from keras.layers import * 15 | from keras.preprocessing.image import ImageDataGenerator 16 | 17 | 18 | # the squashing function. 19 | # we use 0.5 in stead of 1 in hinton's paper. 20 | # if 1, the norm of vector will be zoomed out. 21 | # if 0.5, the norm will be zoomed in while original norm is less than 0.5 22 | # and be zoomed out while original norm is greater than 0.5. 23 | def squash(x, axis=-1): 24 | s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon() 25 | scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm) 26 | return scale * x 27 | 28 | 29 | # define our own softmax function instead of K.softmax 30 | # because K.softmax can not specify axis. 31 | def softmax(x, axis=-1): 32 | ex = K.exp(x - K.max(x, axis=axis, keepdims=True)) 33 | return ex / K.sum(ex, axis=axis, keepdims=True) 34 | 35 | 36 | # define the margin loss like hinge loss 37 | def margin_loss(y_true, y_pred): 38 | lamb, margin = 0.5, 0.1 39 | return y_true * K.square(K.relu(1 - margin - y_pred)) + lamb * ( 40 | 1 - y_true) * K.square(K.relu(y_pred - margin)) 41 | 42 | 43 | class Capsule(Layer): 44 | """A Capsule Implement with Pure Keras 45 | There are two vesions of Capsule. 46 | One is like dense layer (for the fixed-shape input), 47 | and the other is like timedistributed dense (for various length input). 48 | The input shape of Capsule must be (batch_size, 49 | input_num_capsule, 50 | input_dim_capsule 51 | ) 52 | and the output shape is (batch_size, 53 | num_capsule, 54 | dim_capsule 55 | ) 56 | Capsule Implement is from https://github.com/bojone/Capsule/ 57 | Capsule Paper: https://arxiv.org/abs/1710.09829 58 | """ 59 | 60 | def __init__(self, 61 | num_capsule, 62 | dim_capsule, 63 | routings=3, 64 | share_weights=True, 65 | activation='squash', 66 | **kwargs): 67 | super(Capsule, self).__init__(**kwargs) 68 | self.num_capsule = num_capsule 69 | self.dim_capsule = dim_capsule 70 | self.routings = routings 71 | self.share_weights = share_weights 72 | if activation == 'squash': 73 | self.activation = squash 74 | else: 75 | self.activation = activations.get(activation) 76 | 77 | def build(self, input_shape): 78 | input_dim_capsule = input_shape[-1] 79 | if self.share_weights: 80 | self.kernel = self.add_weight( 81 | name='capsule_kernel', 82 | shape=(1, input_dim_capsule, 83 | self.num_capsule * self.dim_capsule), 84 | initializer='glorot_uniform', 85 | trainable=True) 86 | else: 87 | input_num_capsule = input_shape[-2] 88 | self.kernel = self.add_weight( 89 | name='capsule_kernel', 90 | shape=(input_num_capsule, input_dim_capsule, 91 | self.num_capsule * self.dim_capsule), 92 | initializer='glorot_uniform', 93 | trainable=True) 94 | 95 | def call(self, inputs): 96 | """Following the routing algorithm from Hinton's paper, 97 | but replace b = b + with b = . 98 | This change can improve the feature representation of Capsule. 99 | However, you can replace 100 | b = K.batch_dot(outputs, hat_inputs, [2, 3]) 101 | with 102 | b += K.batch_dot(outputs, hat_inputs, [2, 3]) 103 | to realize a standard routing. 104 | """ 105 | 106 | if self.share_weights: 107 | hat_inputs = K.conv1d(inputs, self.kernel) 108 | else: 109 | hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1]) 110 | 111 | batch_size = K.shape(inputs)[0] 112 | input_num_capsule = K.shape(inputs)[1] 113 | hat_inputs = K.reshape(hat_inputs, 114 | (batch_size, input_num_capsule, 115 | self.num_capsule, self.dim_capsule)) 116 | hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3)) 117 | 118 | b = K.zeros_like(hat_inputs[:, :, :, 0]) 119 | for i in range(self.routings): 120 | c = softmax(b, 1) 121 | if K.backend() == 'theano': 122 | o = K.sum(o, axis=1) 123 | o = self.activation(K.batch_dot(c, hat_inputs, [2, 2])) 124 | if i < self.routings - 1: 125 | b = K.batch_dot(o, hat_inputs, [2, 3]) 126 | if K.backend() == 'theano': 127 | o = K.sum(o, axis=1) 128 | 129 | return o 130 | 131 | def compute_output_shape(self, input_shape): 132 | return (None, self.num_capsule, self.dim_capsule) 133 | 134 | 135 | batch_size = 128 136 | num_classes = 10 137 | epochs = 100 138 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 139 | 140 | x_train = x_train.astype('float32') 141 | x_test = x_test.astype('float32') 142 | x_train /= 255 143 | x_test /= 255 144 | y_train = utils.to_categorical(y_train, num_classes) 145 | y_test = utils.to_categorical(y_test, num_classes) 146 | 147 | # A common Conv2D model 148 | input_image = Input(shape=(None, None, 3)) 149 | x = Conv2D(64, (3, 3), activation='relu')(input_image) 150 | x = Conv2D(64, (3, 3), activation='relu')(x) 151 | x = AveragePooling2D((2, 2))(x) 152 | x = Conv2D(128, (3, 3), activation='relu')(x) 153 | x = Conv2D(128, (3, 3), activation='relu')(x) 154 | 155 | 156 | """now we reshape it as (batch_size, input_num_capsule, input_dim_capsule) 157 | then connect a Capsule layer. 158 | the output of final model is the lengths of 10 Capsule, whose dim=16. 159 | the length of Capsule is the proba, 160 | so the problem becomes a 10 two-classification problem. 161 | """ 162 | 163 | x = Reshape((-1, 128))(x) 164 | capsule = Capsule(10, 16, 3, True)(x) 165 | output = Lambda(lambda z: K.sqrt(K.sum(K.square(z), 2)))(capsule) 166 | model = Model(inputs=input_image, outputs=output) 167 | 168 | # we use a margin loss 169 | model.compile(loss=margin_loss, optimizer='adam', metrics=['accuracy']) 170 | model.summary() 171 | 172 | # we can compare the performance with or without data augmentation 173 | data_augmentation = True 174 | 175 | if not data_augmentation: 176 | print('Not using data augmentation.') 177 | model.fit( 178 | x_train, 179 | y_train, 180 | batch_size=batch_size, 181 | epochs=epochs, 182 | validation_data=(x_test, y_test), 183 | shuffle=True) 184 | else: 185 | print('Using real-time data augmentation.') 186 | # This will do preprocessing and realtime data augmentation: 187 | datagen = ImageDataGenerator( 188 | featurewise_center=False, # set input mean to 0 over the dataset 189 | samplewise_center=False, # set each sample mean to 0 190 | featurewise_std_normalization=False, # divide inputs by dataset std 191 | samplewise_std_normalization=False, # divide each input by its std 192 | zca_whitening=False, # apply ZCA whitening 193 | rotation_range=0, # randomly rotate images in 0 to 180 degrees 194 | width_shift_range=0.1, # randomly shift images horizontally 195 | height_shift_range=0.1, # randomly shift images vertically 196 | horizontal_flip=True, # randomly flip images 197 | vertical_flip=False) # randomly flip images 198 | 199 | # Compute quantities required for feature-wise normalization 200 | # (std, mean, and principal components if ZCA whitening is applied). 201 | datagen.fit(x_train) 202 | 203 | # Fit the model on the batches generated by datagen.flow(). 204 | model.fit_generator( 205 | datagen.flow(x_train, y_train, batch_size=batch_size), 206 | epochs=epochs, 207 | validation_data=(x_test, y_test), 208 | workers=4) 209 | 210 | -------------------------------------------------------------------------------- /capslayer/layers.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This module provides a set of high-level neural networks layers. 3 | ''' 4 | 5 | import tensorflow as tf 6 | from functools import reduce 7 | 8 | from capslayer.utils import get_transformation_matrix_shape 9 | from capslayer.utils import euclidean_norm 10 | from capslayer.ops import routing 11 | 12 | 13 | def fully_connected(inputs, activation, 14 | num_outputs, 15 | out_caps_shape, 16 | routing_method='EMRouting', 17 | reuse=None): 18 | '''A capsule fully connected layer. 19 | Args: 20 | inputs: A tensor with shape [batch_size, num_inputs] + in_caps_shape. 21 | activation: [batch_size, num_inputs] 22 | num_outputs: Integer, the number of output capsules in the layer. 23 | out_caps_shape: A list with two elements, pose shape of output capsules. 24 | Returns: 25 | pose: [batch_size, num_outputs] + out_caps_shape 26 | activation: [batch_size, num_outputs] 27 | ''' 28 | in_pose_shape = inputs.get_shape().as_list() 29 | num_inputs = in_pose_shape[1] 30 | batch_size = in_pose_shape[0] 31 | T_size = get_transformation_matrix_shape(in_pose_shape[-2:], out_caps_shape) 32 | T_shape = [1, num_inputs, num_outputs] + T_size 33 | T_matrix = tf.get_variable("transformation_matrix", shape=T_shape) 34 | T_matrix = tf.tile(T_matrix, [batch_size, 1, 1, 1, 1]) 35 | inputs = tf.tile(tf.expand_dims(inputs, axis=2), [1, 1, num_outputs, 1, 1]) 36 | with tf.variable_scope('transformation'): 37 | # vote: [batch_size, num_inputs, num_outputs] + out_caps_shape 38 | vote = tf.matmul(T_matrix, inputs) 39 | with tf.variable_scope('routing'): 40 | if routing_method == 'EMRouting': 41 | activation = tf.reshape(activation, shape=activation.get_shape().as_list() + [1, 1]) 42 | vote = tf.reshape(vote, shape=[batch_size, num_inputs, num_outputs, -1]) 43 | pose, activation = routing(vote, activation, num_outputs, out_caps_shape, routing_method) 44 | pose = tf.reshape(pose, shape=[batch_size, num_outputs] + out_caps_shape) 45 | activation = tf.reshape(activation, shape=[batch_size, -1]) 46 | elif routing_method == 'DynamicRouting': 47 | pose, _ = routing(vote, activation, num_outputs=num_outputs, out_caps_shape=out_caps_shape, method=routing_method) 48 | pose = tf.squeeze(pose, axis=1) 49 | activation = tf.squeeze(euclidean_norm(pose)) 50 | return(pose, activation) 51 | 52 | 53 | def primaryCaps(input, filters, 54 | kernel_size, 55 | strides, 56 | out_caps_shape, 57 | method=None, 58 | regularizer=None): 59 | '''PrimaryCaps layer 60 | Args: 61 | input: [batch_size, in_height, in_width, in_channels]. 62 | filters: Integer, the dimensionality of the output space. 63 | kernel_size: ... 64 | strides: ... 65 | out_caps_shape: ... 66 | method: the method of calculating probability of entity existence(logistic, norm, None) 67 | Returns: 68 | pose: [batch_size, out_height, out_width, filters] + out_caps_shape 69 | activation: [batch_size, out_height, out_width, filters] 70 | ''' 71 | # pose matrix 72 | pose_size = reduce(lambda x, y: x * y, out_caps_shape) 73 | pose = tf.layers.conv2d(input, filters * pose_size, 74 | kernel_size=kernel_size, 75 | strides=strides, activation=None, 76 | activity_regularizer=regularizer) 77 | pose_shape = pose.get_shape().as_list()[:3] + [filters] + out_caps_shape 78 | pose = tf.reshape(pose, shape=pose_shape) 79 | 80 | if method == 'logistic': 81 | # logistic activation unit 82 | activation = tf.layers.conv2d(input, filters, 83 | kernel_size=kernel_size, 84 | strides=strides, 85 | activation=tf.nn.sigmoid, 86 | activity_regularizer=regularizer) 87 | elif method == 'norm': 88 | activation = euclidean_norm(pose) 89 | else: 90 | activation = None 91 | 92 | return(pose, activation) 93 | 94 | 95 | def conv2d(in_pose, 96 | activation, 97 | filters, 98 | out_caps_shape, 99 | kernel_size, 100 | strides=(1, 1), 101 | coordinate_addition=False, 102 | regularizer=None, 103 | reuse=None): 104 | '''A capsule convolutional layer. 105 | Args: 106 | in_pose: A tensor with shape [batch_size, in_height, in_width, in_channels] + in_caps_shape. 107 | activation: A tensor with shape [batch_size, in_height, in_width, in_channels] 108 | filters: ... 109 | out_caps_shape: ... 110 | kernel_size: ... 111 | strides: ... 112 | coordinate_addition: ... 113 | regularizer: apply regularization on a newly created variable and add the variable to the collection tf.GraphKeys.REGULARIZATION_LOSSES. 114 | reuse: ... 115 | Returns: 116 | out_pose: A tensor with shape [batch_size, out_height, out_height, out_channals] + out_caps_shape, 117 | out_activation: A tensor with shape [batch_size, out_height, out_height, out_channels] 118 | ''' 119 | # do some preparation stuff 120 | in_pose_shape = in_pose.get_shape().as_list() 121 | in_caps_shape = in_pose_shape[-2:] 122 | batch_size = in_pose_shape[0] 123 | in_channels = in_pose_shape[3] 124 | 125 | T_size = get_transformation_matrix_shape(in_caps_shape, out_caps_shape) 126 | if isinstance(kernel_size, int): 127 | h_kernel_size = kernel_size 128 | w_kernel_size = kernel_size 129 | elif isinstance(kernel_size, (list, tuple)) and len(kernel_size) == 2: 130 | h_kernel_size = kernel_size[0] 131 | w_kernel_size = kernel_size[1] 132 | if isinstance(strides, int): 133 | h_stride = strides 134 | w_stride = strides 135 | elif isinstance(strides, (list, tuple)) and len(strides) == 2: 136 | h_stride = strides[0] 137 | w_stride = strides[1] 138 | num_inputs = h_kernel_size * w_kernel_size * in_channels 139 | batch_shape = [batch_size, h_kernel_size, w_kernel_size, in_channels] 140 | T_shape = (1, num_inputs, filters) + tuple(T_size) 141 | 142 | T_matrix = tf.get_variable("transformation_matrix", shape=T_shape, regularizer=regularizer) 143 | T_matrix_batched = tf.tile(T_matrix, [batch_size, 1, 1, 1, 1]) 144 | 145 | h_step = int((in_pose_shape[1] - h_kernel_size) / h_stride + 1) 146 | w_step = int((in_pose_shape[2] - w_kernel_size) / w_stride + 1) 147 | out_pose = [] 148 | out_activation = [] 149 | # start to do capsule convolution. 150 | # Note: there should be another way more computationally efficient to do this 151 | for i in range(h_step): 152 | col_pose = [] 153 | col_prob = [] 154 | h_s = i * h_stride 155 | h_e = h_s + h_kernel_size 156 | for j in range(w_step): 157 | with tf.variable_scope("transformation"): 158 | begin = [0, i * h_stride, j * w_stride, 0, 0, 0] 159 | size = batch_shape + in_caps_shape 160 | w_s = j * w_stride 161 | pose_sliced = in_pose[:, h_s:h_e, w_s:(w_s + w_kernel_size), :, :, :] 162 | pose_reshaped = tf.reshape(pose_sliced, shape=[batch_size, num_inputs, 1] + in_caps_shape) 163 | shape = [batch_size, num_inputs, filters] + in_caps_shape 164 | batch_pose = tf.multiply(pose_reshaped, tf.constant(1., shape=shape)) 165 | vote = tf.reshape(tf.matmul(T_matrix_batched, batch_pose), shape=[batch_size, num_inputs, filters, -1]) 166 | # do Coordinate Addition. Note: not yet completed 167 | if coordinate_addition: 168 | x = j / w_step 169 | y = i / h_step 170 | 171 | with tf.variable_scope("routing") as scope: 172 | if i > 0 or j > 0: 173 | scope.reuse_variables() 174 | begin = [0, i * h_stride, j * w_stride, 0] 175 | size = [batch_size, h_kernel_size, w_kernel_size, in_channels] 176 | prob = tf.slice(activation, begin, size) 177 | prob = tf.reshape(prob, shape=[batch_size, -1, 1, 1]) 178 | pose, prob = routing(vote, prob, filters, out_caps_shape, method="EMRouting", regularizer=regularizer) 179 | col_pose.append(pose) 180 | col_prob.append(prob) 181 | col_pose = tf.concat(col_pose, axis=2) 182 | col_prob = tf.concat(col_prob, axis=2) 183 | out_pose.append(col_pose) 184 | out_activation.append(col_prob) 185 | out_pose = tf.concat(out_pose, axis=1) 186 | out_activation = tf.concat(out_activation, axis=1) 187 | 188 | return(out_pose, out_activation) 189 | -------------------------------------------------------------------------------- /base_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Feb 22 23:16:18 2018 5 | 6 | @author: optnio 7 | """ 8 | 9 | import tensorflow as tf 10 | from collections import Counter 11 | from utils.utils import Utils as U 12 | import json 13 | import numpy as np 14 | from logger.logger import Logger 15 | import time 16 | import pickle 17 | import os 18 | 19 | log = Logger("ModelBase") 20 | 21 | class Hyperparameters(object): 22 | """ 23 | Simple class used to store Hyperparameters 24 | """ 25 | def __init__(self): 26 | super(Hyperparameters, self).__init__() 27 | # List used to store list of hyperparameters name 28 | self.hyp_list = [] 29 | 30 | def set_hyp(self, hyp): 31 | """ 32 | Method used to store hyperparameters inside this class 33 | **input: ** 34 | *hyp (Dict) Dictionary storing all hyperparameters values 35 | """ 36 | for key in hyp: 37 | self.hyp_list.append(key) 38 | setattr(self, key, hyp[key]) 39 | 40 | class ModelBase(object): 41 | """ 42 | Base Model Class 43 | """ 44 | 45 | # Hyp : Hyperparameters 46 | DEFAULT_OUTPUT = "outputs" 47 | DEFAULT_CHECKPOINT_FOLDER = "checkpoints" 48 | 49 | def __init__(self, model_name, hyperparameters_name=None, hyperparameters_content=None, output_folder=None): 50 | """ 51 | **input: 52 | *hyperparameters_name: [Optional] (String|None) Path to the hyperparameters file 53 | By default: hyperparameters.json 54 | *model_name: (Integer) Name of this model 55 | """ 56 | super(ModelBase, self).__init__() 57 | 58 | self.current_dir = os.path.dirname(os.path.realpath(__file__)) 59 | # Output folder 60 | if output_folder is None: 61 | self.output_folder = os.path.join( 62 | os.path.dirname(os.path.abspath(__file__)), self.DEFAULT_OUTPUT) 63 | else: 64 | self.output_folder = output_folder 65 | 66 | hyp_folder = "hyperparameter" 67 | hyp_filename = "parameters.json" 68 | hyp_path = os.path.join(self.current_dir, os.path.join(hyp_folder, hyp_filename)) 69 | self.checkpoints_folder = os.path.join(self.output_folder, self.DEFAULT_CHECKPOINT_FOLDER) 70 | 71 | # Set hyperparameters path 72 | if hyperparameters_name is not None: 73 | hyp_path = os.path.join( 74 | self.current_dir, os.path.join(hyp_folder, hyperparameters_name)) 75 | hyp_path = hyp_path if hyperparameters_name is None else hyp_path 76 | # Load hyperparameters content 77 | if hyperparameters_content is None: 78 | hyp_content = U.read_json_file(hyp_path) 79 | else: 80 | hyp_content = hyperparameters_content 81 | # Set hyperparameters 82 | self.h = Hyperparameters() 83 | self.h.set_hyp(hyp_content) 84 | # Set model names 85 | self.name = model_name 86 | self.model_name = model_name 87 | self._set_hyperparameters_name() 88 | # Since hyperparameters had changed, we need to set again each name 89 | self._set_names() 90 | 91 | def _create_conv(self, prev, shape, padding='VALID', strides=[1, 1, 1, 1], relu=False, 92 | max_pooling=False, mp_ksize=[1, 2, 2, 1], mp_strides=[1, 2, 2, 1]): 93 | """ 94 | Create a convolutional layer with relu and/mor max pooling(Optional) 95 | """ 96 | conv_w = tf.Variable(tf.truncated_normal(shape=shape, mean = 0, stddev = 0.1, seed=0)) 97 | conv_b = tf.Variable(tf.zeros(shape[-1])) 98 | conv = tf.nn.conv2d(prev, conv_w, strides=strides, padding=padding) + conv_b 99 | 100 | if relu: 101 | conv = tf.nn.relu(conv) 102 | 103 | if max_pooling: 104 | conv = tf.nn.max_pool(conv, ksize=mp_ksize, strides=mp_strides, padding='VALID') 105 | 106 | return conv 107 | 108 | def _fc(self, prev, input_size, output_size, relu=False, sigmoid=False, no_bias=False, 109 | softmax=False): 110 | """ 111 | Create fully connecter layer with relu(Optional) 112 | """ 113 | fc_w = tf.Variable( 114 | tf.truncated_normal(shape=(input_size, output_size), mean = 0., stddev = 0.1)) 115 | fc_b = tf.Variable(tf.zeros(output_size)) 116 | pre_activation = tf.matmul(prev, fc_w) 117 | activation = None 118 | 119 | if not no_bias: 120 | pre_activation = pre_activation + fc_b 121 | if relu: 122 | activation = tf.nn.relu(pre_activation) 123 | if sigmoid: 124 | activation = tf.nn.sigmoid(pre_activation) 125 | if softmax: 126 | activation = tf.nn.softmax(pre_activation) 127 | 128 | if activation is None: 129 | activation = pre_activation 130 | 131 | return activation, pre_activation 132 | 133 | def init_session(self): 134 | """ 135 | Init tensorflow session 136 | A saver property is create at the same time 137 | """ 138 | # Create session 139 | self.saver = tf.train.Saver() 140 | self.sess = tf.Session() 141 | # Init variables 142 | self.sess.run(tf.global_variables_initializer()) 143 | # Tensorboard 144 | self.tf_tensorboard = tf.summary.merge_all() 145 | train_log_name = os.path.join( 146 | os.path.join(self.output_folder, "tensorboard"), self.name, self.sub_train_log_name) 147 | test_log_name = os.path.join( 148 | os.path.join(self.output_folder, "tensorboard"), self.name, self.sub_test_log_name) 149 | self.train_writer = tf.summary.FileWriter(train_log_name, self.sess.graph) 150 | self.test_writer = tf.summary.FileWriter(test_log_name) 151 | self.train_writer_it = 0 152 | self.test_writer_it = 0 153 | 154 | # Backup tensors 155 | backup_tensors = {} 156 | for field in dir(self): 157 | if "tf_" in field and field.index("tf_") == 0: 158 | backup_tensors[field] = getattr(self, field).name 159 | tf.constant(json.dumps(backup_tensors), dtype=tf.string, name="model_base_tensors_backup") 160 | # Backup hyperparameters 161 | backup_hyp = {} 162 | for field in self.h.hyp_list: 163 | value = getattr(self.h, field) 164 | d_type = tf.int32 if isinstance(value, int) else tf.float32 165 | n_cst = tf.constant(value, dtype=d_type, name="hyp/%s" % field) 166 | backup_hyp[field] = n_cst.name 167 | tf.constant(json.dumps(backup_hyp), dtype=tf.string, name="model_base_hyp_backup") 168 | 169 | def get_equal_batches(self, data, labels, batch_size): 170 | """ 171 | This method will return a generator class which could be used to 172 | get new batches with the same number of rows for each class 173 | **input:** 174 | *batch_size (int) Size of each batch 175 | **return (Python Generator of Batch class)** 176 | """ 177 | labels = np.array(labels) 178 | 179 | indexs = np.arange(len(data)) 180 | np.random.shuffle(indexs) 181 | 182 | data = data[indexs] 183 | labels = labels[indexs] 184 | 185 | max_size = Counter(labels).most_common()[-1][1] 186 | unique_label = np.array(list(set(labels))) 187 | nb_classes = len(unique_label) 188 | 189 | if batch_size > max_size: 190 | batch_size = max_size 191 | 192 | batch_per_class = batch_size // nb_classes 193 | iterations = max_size // batch_per_class 194 | 195 | for it in range(iterations): 196 | 197 | indexes = [] 198 | 199 | for label in unique_label: 200 | n_indexes = np.where(labels==label)[0][it * batch_per_class: (it + 1) * batch_per_class] 201 | n_indexes = n_indexes.tolist() 202 | indexes += n_indexes 203 | 204 | indexes = np.array(indexes) 205 | 206 | x = data[indexes] 207 | y = labels[indexes] 208 | 209 | yield x, y 210 | 211 | 212 | def get_batches(self, data_list, batch_size, shuffle=True): 213 | """ 214 | This method will return a generator class which could be used to 215 | get new batches. 216 | **input:** 217 | *batch_size (int) Size of each batch 218 | **return (Python Generator of Batch class)** 219 | """ 220 | if shuffle: 221 | indexs = np.arange(len(data_list[0])) 222 | np.random.shuffle(indexs) 223 | 224 | for d, data in enumerate(data_list): 225 | data_list[d] = np.array(data_list[d]) 226 | data_list[d] = data_list[d][indexs] 227 | 228 | iterations = len(data_list[0]) // batch_size 229 | for iteration in range(iterations): 230 | yield (dt[iteration * batch_size: (iteration + 1) * batch_size] for dt in data_list) 231 | 232 | def save(self, name=None): 233 | """ 234 | Save the model 235 | """ 236 | log.info("Saving model ...") 237 | 238 | if name is None: 239 | name = self.model_name 240 | 241 | if not os.path.exists(self.checkpoints_folder): 242 | os.makedirs(self.checkpoints_folder) 243 | 244 | save_path = self.saver.save( 245 | self.sess, os.path.join(self.checkpoints_folder, name)) 246 | 247 | log.info("Model successfully saved here: %s" % save_path) 248 | 249 | def _set_hyperparameters_name(self): 250 | """ 251 | Convert hyperparameters dict to a string 252 | This string will be used to set the models names 253 | """ 254 | # Generate a little name for each hyperparameters 255 | hyperparameters_names = [("".join([p[0] for p in hyp.split("_")]), getattr(self.h, hyp)) 256 | for hyp in self.h.hyp_list] 257 | self.hyperparameters_name = "" 258 | for index_hyperparameter, hyperparameter in enumerate(hyperparameters_names): 259 | short_name, value = hyperparameter 260 | prepend = "" if index_hyperparameter == 0 else "_" 261 | self.hyperparameters_name += "%s%s_%s" % (prepend, short_name, value) 262 | 263 | def _set_names(self): 264 | """ 265 | Set all model names 266 | """ 267 | name_time = "%s--%s" % (self.model_name, time.time()) 268 | # model_name is used to set the ckpt name 269 | self.model_name = "%s--%s" % (self.hyperparameters_name, name_time) 270 | # sub_train_log_name is used to set the name of the training part in tensorboard 271 | self.sub_train_log_name = "%s-train--%s" % (self.hyperparameters_name, name_time) 272 | # sub_test_log_name is used to set the name of the testing part in tensorboard 273 | self.sub_test_log_name = "%s-test--%s" % (self.hyperparameters_name, name_time) 274 | 275 | def dump_batch(self, folder, data): 276 | """ 277 | Save batches 278 | Mainly used for Reinforcement Learning 279 | """ 280 | folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), folder) 281 | # Create folder if not exist 282 | if not os.path.exists(folder): 283 | os.makedirs(folder) 284 | 285 | pickle.dump(data, open(os.path.join(folder, str(time.time())), "wb" )) 286 | 287 | 288 | def load(self, ckpt): 289 | """ 290 | Load a model 291 | """ 292 | log.info("Loading ckpt ...") 293 | #loaded_graph = tf.Graph() 294 | #tf.reset_default_graph() 295 | #g = tf.Graph() 296 | #with g.as_default(): 297 | self.sess = tf.Session() 298 | # Load the graph 299 | loader = tf.train.import_meta_graph(ckpt + '.meta') 300 | loader.restore(self.sess, ckpt) 301 | 302 | g = tf.get_default_graph() 303 | 304 | # Search for the backup tensor 305 | tensor_names = [ 306 | n.name for n in g.as_graph_def().node if "model_base_tensors_backup" in n.name] 307 | 308 | # Search for the backup hyp 309 | hyp_names = [ 310 | n.name for n in g.as_graph_def().node if "model_base_hyp_backup" in n.name] 311 | 312 | # Get the tensor string 313 | #tensors = g.get_tensor_by_name(names[0]) 314 | tensors = g.get_operation_by_name(tensor_names[0]).outputs 315 | hyps = g.get_operation_by_name(hyp_names[0]).outputs 316 | 317 | #self.sess.run(tf.global_variables_initializer()) 318 | 319 | tensors = self.sess.run(tensors)[0] 320 | tensors = json.loads(tensors) 321 | for tensor in tensors: 322 | try: 323 | n_tensor = g.get_tensor_by_name(tensors[tensor]) 324 | except Exception as e: 325 | n_tensor = g.get_operation_by_name(tensors[tensor]) 326 | setattr(self, tensor, n_tensor) 327 | 328 | hyps = self.sess.run(hyps)[0] 329 | hyps = json.loads(hyps) 330 | for hyp in hyps: 331 | n_hyp = g.get_tensor_by_name(hyps[hyp]) 332 | setattr(self.h, hyp, self.sess.run(n_hyp)) 333 | 334 | log.info("Ckpt ready") 335 | 336 | # Tensorboard 337 | self.tf_tensorboard = tf.summary.merge_all() 338 | train_log_name = os.path.join( 339 | os.path.join(self.output_folder, "tensorboard"), self.name, self.sub_train_log_name) 340 | test_log_name = os.path.join( 341 | os.path.join(self.output_folder, "tensorboard"), self.name, self.sub_test_log_name) 342 | self.train_writer = tf.summary.FileWriter(train_log_name, self.sess.graph) 343 | self.test_writer = tf.summary.FileWriter(test_log_name) 344 | self.train_writer_it = 0 345 | self.test_writer_it = 0 346 | 347 | self.model_name = ckpt.split("/")[-1] 348 | self.saver = tf.train.Saver() 349 | 350 | 351 | if __name__ == '__main__': 352 | base_model = ModelBase("test") -------------------------------------------------------------------------------- /model_object_detection.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Feb 22 22:42:56 2018 5 | 6 | @author: optnio 7 | """ 8 | 9 | import numpy as np 10 | from base_model import ModelBase 11 | import tensorflow as tf 12 | from capsnet import conv_caps_layer,fully_connected_caps_layer 13 | 14 | 15 | class Model_Object_Detection(ModelBase): 16 | """ 17 | Model_object_Detection 18 | This class is used to create the conv graph using: 19 | Dynamic Routing Between Capsules 20 | """ 21 | 22 | # Numbers of label to predict 23 | NB_LABELS = 100 24 | 25 | def __init__(self, model_name, output_folder): 26 | """ 27 | **input: 28 | *model_name: (Integer) Name of this model 29 | *output_folder: Output folder to saved data (tensorboard, checkpoints) 30 | """ 31 | ModelBase.__init__(self, model_name, output_folder=output_folder) 32 | 33 | def _build_inputs(self): 34 | """ 35 | Build tensorflow inputs 36 | (Placeholder) 37 | **return: ** 38 | *tf_images: Images Placeholder 39 | *tf_labels: Labels Placeholder 40 | """ 41 | # Images 32*32*3 42 | tf_images = tf.placeholder(tf.float32, [None, 32, 32, 3], name='images') 43 | # Labels: [0, 1, 6, 20, ...] 44 | tf_labels = tf.placeholder(tf.int64, [None], name='labels') 45 | return tf_images, tf_labels 46 | 47 | def _build_main_network(self, images, conv_2_dropout): 48 | """ 49 | This method is used to create the two convolutions and the CapsNet on the top 50 | **input: 51 | *images: Image PLaceholder 52 | *conv_2_dropout: Dropout value placeholder 53 | **return: ** 54 | *Caps1: Output of first Capsule layer 55 | *Caps2: Output of second Capsule layer 56 | """ 57 | # First BLock: 58 | # Layer 1: Convolution. 59 | shape = (self.h.conv_1_size, self.h.conv_1_size, 3, self.h.conv_1_nb) 60 | conv1 = self._create_conv(self.tf_images, shape, relu=True, max_pooling=False, padding='VALID') 61 | 62 | # Layer 2: Convolution. 63 | #shape = (self.h.conv_2_size, self.h.conv_2_size, self.h.conv_1_nb, self.h.conv_2_nb) 64 | #conv2 = self._create_conv(conv1, shape, relu=True, max_pooling=False, padding='VALID') 65 | conv1 = tf.nn.dropout(conv1, keep_prob=conv_2_dropout) 66 | 67 | # Create the first capsules layer 68 | caps1 = conv_caps_layer( 69 | input_layer=conv1, 70 | capsules_size=self.h.caps_1_vec_len, 71 | nb_filters=self.h.caps_1_nb_filter, 72 | kernel=self.h.caps_1_size) 73 | # Create the second capsules layer used to predict the output 74 | caps2 = fully_connected_caps_layer( 75 | input_layer=caps1, 76 | capsules_size=self.h.caps_2_vec_len, 77 | nb_capsules=self.NB_LABELS, 78 | iterations=self.h.routing_steps) 79 | 80 | return caps1, caps2 81 | 82 | def _build_decoder(self, caps2, one_hot_labels, batch_size): 83 | """ 84 | Build the decoder part from the last capsule layer 85 | **input: 86 | *Caps2: Output of second Capsule layer 87 | *one_hot_labels 88 | *batch_size 89 | """ 90 | labels = tf.reshape(one_hot_labels, (-1, self.NB_LABELS, 1)) 91 | # squeeze(caps2): [?, len_v_j, capsules_nb] 92 | # labels: [?, NB_LABELS, 1] with capsules_nb == NB_LABELS 93 | mask = tf.matmul(tf.squeeze(caps2), labels, transpose_a=True) 94 | # Select the good capsule vector 95 | capsule_vector = tf.reshape(mask, shape=(batch_size, self.h.caps_2_vec_len)) 96 | # capsule_vector: [?, len_v_j] 97 | 98 | # Reconstruct image 99 | fc1 = tf.contrib.layers.fully_connected(capsule_vector, num_outputs=400) 100 | fc1 = tf.reshape(fc1, shape=(batch_size, 5, 5, 16)) 101 | upsample1 = tf.image.resize_nearest_neighbor(fc1, (8, 8)) 102 | conv1 = tf.layers.conv2d(upsample1, 4, (3,3), padding='same', activation=tf.nn.relu) 103 | 104 | upsample2 = tf.image.resize_nearest_neighbor(conv1, (16, 16)) 105 | conv2 = tf.layers.conv2d(upsample2, 8, (3,3), padding='same', activation=tf.nn.relu) 106 | 107 | upsample3 = tf.image.resize_nearest_neighbor(conv2, (32, 32)) 108 | conv6 = tf.layers.conv2d(upsample3, 16, (3,3), padding='same', activation=tf.nn.relu) 109 | 110 | # 3 channel for RGG 111 | logits = tf.layers.conv2d(conv6, 3, (3,3), padding='same', activation=None) 112 | decoded = tf.nn.sigmoid(logits, name='decoded') 113 | tf.summary.image('reconstruction_img', decoded) 114 | 115 | return decoded 116 | 117 | def init(self): 118 | """ 119 | Init the graph 120 | """ 121 | # Get graph inputs 122 | self.tf_images, self.tf_labels = self._build_inputs() 123 | # Dropout inputs 124 | self.tf_conv_2_dropout = tf.placeholder(tf.float32, shape=(), name='conv_2_dropout') 125 | # Dynamic batch size 126 | batch_size = tf.shape(self.tf_images)[0] 127 | # Translate labels to one hot array 128 | one_hot_labels = tf.one_hot(self.tf_labels, depth=self.NB_LABELS) 129 | # Create the first convolution and the CapsNet 130 | self.tf_caps1, self.tf_caps2 = self._build_main_network(self.tf_images, self.tf_conv_2_dropout) 131 | 132 | # Build the images reconstruction 133 | self.tf_decoded = self._build_decoder(self.tf_caps2, one_hot_labels, batch_size) 134 | 135 | # Build the loss 136 | _loss = self._build_loss( 137 | self.tf_caps2, one_hot_labels, self.tf_labels, self.tf_decoded, self.tf_images) 138 | (self.tf_loss_squared_rec, self.tf_margin_loss_sum, self.tf_predicted_class, 139 | self.tf_correct_prediction, self.tf_accuracy, self.tf_loss, self.tf_margin_loss, 140 | self.tf_reconstruction_loss) = _loss 141 | 142 | # Build optimizer 143 | optimizer = tf.train.AdamOptimizer(learning_rate=self.h.learning_rate) 144 | self.tf_optimizer = optimizer.minimize(self.tf_loss, global_step=tf.Variable(0, trainable=False)) 145 | 146 | # Log value into tensorboard 147 | tf.summary.scalar('margin_loss', self.tf_margin_loss) 148 | tf.summary.scalar('accuracy', self.tf_accuracy) 149 | tf.summary.scalar('total_loss', self.tf_loss) 150 | tf.summary.scalar('reconstruction_loss', self.tf_reconstruction_loss) 151 | 152 | self.tf_test = tf.random_uniform([2], minval=0, maxval=None, dtype=tf.float32, seed=None, name="tf_test") 153 | 154 | self.init_session() 155 | 156 | 157 | def _build_loss(self, caps2, one_hot_labels, labels, decoded, images): 158 | """ 159 | Build the loss of the graph 160 | """ 161 | # Get the length of each capsule 162 | capsules_length = tf.sqrt(tf.reduce_sum(tf.square(caps2), axis=2, keep_dims=True)) 163 | 164 | max_l = tf.square(tf.maximum(0., 0.9 - capsules_length)) 165 | max_l = tf.reshape(max_l, shape=(-1, self.NB_LABELS)) 166 | max_r = tf.square(tf.maximum(0., capsules_length - 0.1)) 167 | max_r = tf.reshape(max_r, shape=(-1, self.NB_LABELS)) 168 | t_c = one_hot_labels 169 | m_loss = t_c * max_l + 0.5 * (1 - t_c) * max_r 170 | margin_loss_sum = tf.reduce_sum(m_loss, axis=1) 171 | margin_loss = tf.reduce_mean(margin_loss_sum) 172 | 173 | # Reconstruction loss 174 | loss_squared_rec = tf.square(decoded - images) 175 | reconstruction_loss = tf.reduce_mean(loss_squared_rec) 176 | 177 | # 3. Total loss 178 | loss = margin_loss + (0.0005 * reconstruction_loss) 179 | 180 | # Accuracy 181 | predicted_class = tf.argmax(capsules_length, axis=1) 182 | predicted_class = tf.reshape(predicted_class, [tf.shape(capsules_length)[0]]) 183 | correct_prediction = tf.equal(predicted_class, labels) 184 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 185 | 186 | return (loss_squared_rec, margin_loss_sum, predicted_class, correct_prediction, accuracy, 187 | loss, margin_loss, reconstruction_loss) 188 | 189 | def optimize(self, images, labels, tb_save=True): 190 | """ 191 | Train the model 192 | **input: ** 193 | *images: Image to train the model on 194 | *labels: True classes 195 | *tb_save: (Boolean) Log this optimization in tensorboard 196 | **return: ** 197 | Loss: The loss of the model on this batch 198 | Acc: Accuracy of the model on this batch 199 | """ 200 | tensors = [self.tf_optimizer, self.tf_margin_loss, self.tf_accuracy, self.tf_tensorboard] 201 | _, loss, acc, summary = self.sess.run(tensors, 202 | feed_dict={ 203 | self.tf_images: images, 204 | self.tf_labels: labels, 205 | self.tf_conv_2_dropout: self.h.conv_2_dropout 206 | }) 207 | 208 | if tb_save: 209 | # Write data to tensorboard 210 | self.train_writer.add_summary(summary, self.train_writer_it) 211 | self.train_writer_it += 1 212 | 213 | return loss, acc 214 | 215 | def evaluate(self, images, labels, tb_train_save=False, tb_test_save=False): 216 | """ 217 | Evaluate dataset 218 | **input: ** 219 | *images: Image to train the model on 220 | *labels: True classes 221 | *tb_train_save: (Boolean) Log this optimization in tensorboard under the train part 222 | *tb_test_save: (Boolean) Log this optimization in tensorboard under the test part 223 | **return: ** 224 | Loss: The loss of the model on this batch 225 | Acc: Accuracy of the model on this batch 226 | """ 227 | tensors = [self.tf_margin_loss, self.tf_accuracy, self.tf_tensorboard] 228 | loss, acc, summary = self.sess.run(tensors, 229 | feed_dict={ 230 | self.tf_images: images, 231 | self.tf_labels: labels, 232 | self.tf_conv_2_dropout: 1. 233 | }) 234 | 235 | if tb_test_save: 236 | # Write data to tensorboard 237 | self.test_writer.add_summary(summary, self.test_writer_it) 238 | self.test_writer_it += 1 239 | 240 | if tb_train_save: 241 | # Write data to tensorboard 242 | self.train_writer.add_summary(summary, self.train_writer_it) 243 | self.train_writer_it += 1 244 | 245 | return loss, acc 246 | 247 | def predict(self, images): 248 | """ 249 | Method used to predict a class 250 | Return a softmax 251 | **input: ** 252 | *images: Image to train the model on 253 | **return: 254 | *softmax: Softmax between all capsules 255 | """ 256 | tensors = [self.tf_caps2] 257 | 258 | caps2 = self.sess.run(tensors, 259 | feed_dict={ 260 | self.tf_images: images, 261 | self.tf_conv_2_dropout: 1. 262 | })[0] 263 | 264 | # tf.sqrt(tf.reduce_sum(tf.square(caps2), axis=2, keep_dims=True)) 265 | caps2 = np.sqrt(np.sum(np.square(caps2), axis=2, keepdims=True)) 266 | caps2 = np.reshape(caps2, (len(images), self.NB_LABELS)) 267 | # softmax 268 | softmax = np.exp(caps2) / np.sum(np.exp(caps2), axis=1, keepdims=True) 269 | 270 | return softmax 271 | 272 | def reconstruction(self, images, labels): 273 | """ 274 | Method used to get the reconstructions given a batch 275 | Return the result as a softmax 276 | **input: ** 277 | *images: Image to train the model on 278 | *labels: True classes 279 | """ 280 | tensors = [self.tf_decoded] 281 | 282 | decoded = self.sess.run(tensors, 283 | feed_dict={ 284 | self.tf_images: images, 285 | self.tf_labels: labels, 286 | self.tf_conv_2_dropout: 1. 287 | })[0] 288 | 289 | return decoded 290 | 291 | def evaluate_dataset(self, images, labels, batch_size=10): 292 | """ 293 | Evaluate a full dataset 294 | This method is used to fully evaluate the dataset batch per batch. Useful when 295 | the dataset can't be fit inside to the GPU. 296 | *input: ** 297 | *images: Image to train the model on 298 | *labels: True classes 299 | *return: ** 300 | *loss: Loss overall your dataset 301 | *accuracy: Accuracy overall your dataset 302 | *predicted_class: Predicted class 303 | """ 304 | tensors = [self.tf_loss_squared_rec, self.tf_margin_loss_sum, self.tf_correct_prediction, 305 | self.tf_predicted_class] 306 | 307 | loss_squared_rec_list = None 308 | margin_loss_sum_list = None 309 | correct_prediction_list = None 310 | predicted_class = None 311 | 312 | b = 0 313 | for batch in self.get_batches([images, labels], batch_size, shuffle=False): 314 | images_batch, labels_batch = batch 315 | loss_squared_rec, margin_loss_sum, correct_prediction, classes = self.sess.run(tensors, 316 | feed_dict={ 317 | self.tf_images: images_batch, 318 | self.tf_labels: labels_batch, 319 | self.tf_conv_2_dropout: 1. 320 | }) 321 | if loss_squared_rec_list is not None: 322 | predicted_class = np.concatenate((predicted_class, classes)) 323 | loss_squared_rec_list = np.concatenate((loss_squared_rec_list, loss_squared_rec)) 324 | margin_loss_sum_list = np.concatenate((margin_loss_sum_list, margin_loss_sum)) 325 | correct_prediction_list = np.concatenate((correct_prediction_list, correct_prediction)) 326 | else: 327 | predicted_class = classes 328 | loss_squared_rec_list = loss_squared_rec 329 | margin_loss_sum_list = margin_loss_sum 330 | correct_prediction_list = correct_prediction 331 | b += batch_size 332 | 333 | margin_loss = np.mean(margin_loss_sum_list) 334 | reconstruction_loss = np.mean(loss_squared_rec_list) 335 | accuracy = np.mean(correct_prediction_list) 336 | 337 | loss = tf.add(margin_loss, 0.005 * reconstruction_loss, name="loss") 338 | 339 | return loss, accuracy, predicted_class -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | 7 | # -*- coding: utf-8 -*- 8 | """ 9 | Spyder Editor 10 | 11 | This is a temporary script file. 12 | """ 13 | 14 | import random 15 | import os 16 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 17 | from keras.datasets import cifar100 18 | from keras.utils import to_categorical 19 | from keras.models import Model 20 | from keras.layers import Dense, GlobalAveragePooling2D 21 | from keras.optimizers import SGD 22 | #from keras.applications.vgg19 import VGG19 23 | from keras.applications.vgg16 import VGG16 24 | import numpy as np 25 | 26 | # read data 27 | (x_train, y_train), (x_test, y_test) = cifar100.load_data(label_mode='fine') 28 | 29 | 30 | # TODO: How many unique classes/labels there are in the dataset. 31 | n_classes = len(y_test) 32 | #n_classes = 10 33 | 34 | y_train = np.reshape(y_train,50000) 35 | y_test = np.reshape(y_test,10000) 36 | print(n_classes) 37 | 38 | print(x_train.shape) 39 | print(y_train.shape) 40 | print(x_test.shape) 41 | print(y_test.shape) 42 | 43 | 44 | # In[2]: 45 | 46 | 47 | import matplotlib.pyplot as plt 48 | import random 49 | from PIL import Image 50 | import numpy as np 51 | import random 52 | from PIL import Image, ImageEnhance 53 | # Visualizations will be shown in the notebook. 54 | #get_ipython().magic('matplotlib inline') 55 | 56 | # Load name of id 57 | with open("./data/classnames.csv", "r") as f: 58 | signnames = f.read() 59 | id_to_name = { int(line.split(",")[0]):line.split(",")[1] for line in signnames.split("\n")[1:] if len(line) > 0} 60 | print(id_to_name[1]) 61 | 62 | 63 | # In[3]: 64 | 65 | """ 66 | graph_size = 3 67 | random_index_list = [random.randint(0, x_train.shape[0]) for _ in range(graph_size * graph_size)] 68 | fig = plt.figure(figsize=(15, 15)) 69 | for i, index in enumerate(random_index_list): 70 | a=fig.add_subplot(graph_size, graph_size, i+1) 71 | #im = Image.fromarray(np.rollaxis(X_train[index] * 255, 0,3)) 72 | # imgplot = plt.imshow(x_train[index]) 73 | # Plot some images 74 | 75 | #plt.show() 76 | 77 | 78 | # In[4]: 79 | 80 | 81 | fig, ax = plt.subplots() 82 | # the histogram of the data 83 | values, bins, patches = ax.hist(y_train, n_classes, normed=10) 84 | 85 | # add a 'best fit' line 86 | ax.set_xlabel('Smarts') 87 | ax.set_title(r'Histogram of classess') 88 | 89 | # Tweak spacing to prevent clipping of ylabel 90 | fig.tight_layout() 91 | 92 | print ("Most common index") 93 | most_common_index = sorted(range(len(values)), key=lambda k: values[k], reverse=True) 94 | for index in most_common_index[:10]: 95 | print("index: %s => %s" % (index, values[index])) 96 | """ 97 | 98 | # In[5]: 99 | 100 | 101 | from keras.preprocessing.image import ImageDataGenerator 102 | from keras import utils 103 | 104 | batch_size = 128 105 | num_classes = 100 106 | epochs = 100 107 | 108 | #x_train = x_train.astype('float32') 109 | #x_test = x_test.astype('float32') 110 | #x_train /= 255 111 | #x_test /= 255 112 | #y_train = utils.to_categorical(y_train, num_classes) 113 | #y_test = utils.to_categorical(y_test, num_classes) 114 | x_train = x_train / 255 115 | 116 | x_test = x_test / 255 117 | 118 | 119 | # In[6]: 120 | 121 | 122 | def preprocessing_function(img): 123 | """ 124 | Custom preprocessing_function 125 | """ 126 | img = img * 255 127 | img = Image.fromarray(img.astype('uint8'), 'RGB') 128 | img = ImageEnhance.Brightness(img).enhance(random.uniform(0.6, 1.5)) 129 | img = ImageEnhance.Contrast(img).enhance(random.uniform(0.6, 1.5)) 130 | 131 | return np.array(img) / 255 132 | 133 | train_datagen = ImageDataGenerator() 134 | train_datagen_augmented = ImageDataGenerator( 135 | rotation_range=30, 136 | shear_range=0.2, 137 | width_shift_range=0.2, 138 | height_shift_range=0.2, 139 | horizontal_flip=True, 140 | preprocessing_function=preprocessing_function) 141 | inference_datagen = ImageDataGenerator() 142 | train_datagen.fit(x_test) 143 | train_datagen_augmented.fit(x_test) 144 | inference_datagen.fit(x_test) 145 | inference_datagen.fit(x_test) 146 | 147 | """ 148 | fig = plt.figure() 149 | 150 | n = 0 151 | 152 | graph_size = 3 153 | 154 | for x_batch, y_batch in train_datagen_augmented.flow(x_test, y_test, batch_size=1): 155 | a=fig.add_subplot(graph_size, graph_size, n+1) 156 | # imgplot = plt.imshow(x_batch[0]) 157 | n = n + 1 158 | if n > 8: 159 | break 160 | 161 | 162 | plt.show() 163 | """ 164 | 165 | # In[7]: 166 | 167 | 168 | import numpy as np 169 | import tensorflow as tf 170 | import numpy as np 171 | 172 | 173 | def conv_caps_layer(input_layer, capsules_size, nb_filters, kernel, stride=2): 174 | """ 175 | Capsule layer for the convolutional inputs 176 | **input: 177 | *input_layer: (Tensor) 178 | *capsule_numbers: (Integer) the number of capsule in this layer. 179 | *kernel_size: (Integer) Size of the kernel for each filter. 180 | *stride: (Integer) 2 by default 181 | """ 182 | # "In convolutional capsule layers each unit in a capsule is a convolutional unit. 183 | # Therefore, each capsule will output a grid of vectors rather than a single vector output." 184 | capsules = tf.contrib.layers.conv2d( 185 | input_layer, nb_filters * capsules_size, kernel, stride, padding="VALID") 186 | # conv shape: [?, kernel, kernel, nb_filters] 187 | shape = capsules.get_shape().as_list() 188 | capsules = tf.reshape(capsules, shape=(-1, np.prod(shape[1:3]) * nb_filters, capsules_size, 1)) 189 | # capsules shape: [?, nb_capsules, capsule_size, 1] 190 | return squash(capsules) 191 | 192 | def routing(u_hat, b_ij, nb_capsules, nb_capsules_p, iterations=4): 193 | """ 194 | Routing algorithm 195 | 196 | **input: 197 | *u_hat: Dot product (weights between previous capsule and current capsule) 198 | *b_ij: the log prior probabilities that capsule i should be coupled to capsule j 199 | *nb_capsules_p: Number of capsule in the previous layer 200 | *nb_capsules: Number of capsule in this layer 201 | """ 202 | # Start the routing algorithm 203 | for it in range(iterations): 204 | with tf.variable_scope('routing_' + str(it)): 205 | # Line 4 of algo 206 | # probabilities that capsule i should be coupled to capsule j. 207 | # c_ij: [nb_capsules_p, nb_capsules, 1, 1] 208 | c_ij = tf.nn.softmax(b_ij, dim=2) 209 | 210 | # Line 5 of algo 211 | # c_ij: [ nb_capsules_p, nb_capsules, 1, 1] 212 | # u_hat: [?, nb_capsules_p, nb_capsules, len_v_j, 1] 213 | s_j = tf.multiply(c_ij, u_hat) 214 | # s_j: [?, nb_capsules_p, nb_capsules, len_v_j, 1] 215 | s_j = tf.reduce_sum(s_j, axis=1, keep_dims=True) 216 | # s_j: [?, 1, nb_capsules, len_v_j, 1) 217 | 218 | # line 6: 219 | # squash using Eq.1, 220 | v_j = squash(s_j) 221 | # v_j: [1, 1, nb_capsules, len_v_j, 1) 222 | 223 | # line 7: 224 | # Frist reshape & tile v_j 225 | # [? , 1, nb_capsules, len_v_j, 1] -> 226 | # [?, nb_capsules_p, nb_capsules, len_v_j, 1] 227 | v_j_tiled = tf.tile(v_j, [1, nb_capsules_p, 1, 1, 1]) 228 | # u_hat: [?, nb_capsules_p, nb_capsules, len_v_j, 1] 229 | # v_j_tiled [1, nb_capsules_p, nb_capsules, len_v_j, 1] 230 | u_dot_v = tf.matmul(u_hat, v_j_tiled, transpose_a=True) 231 | # u_produce_v: [?, nb_capsules_p, nb_capsules, 1, 1] 232 | b_ij += tf.reduce_sum(u_dot_v, axis=0, keep_dims=True) 233 | #b_ih: [1, nb_capsules_p, nb_capsules, 1, 1] 234 | 235 | return tf.squeeze(v_j, axis=1) 236 | 237 | def fully_connected_caps_layer(input_layer, capsules_size, nb_capsules, iterations=4): 238 | """ 239 | Second layer receiving inputs from all capsules of the layer below 240 | **input: 241 | *input_layer: (Tensor) 242 | *capsules_size: (Integer) Size of each capsule 243 | *nb_capsules: (Integer) Number of capsule 244 | *iterations: (Integer) Number of iteration for the routing algorithm 245 | 246 | i refer to the layer below. 247 | j refer to the layer above (the current layer). 248 | """ 249 | shape = input_layer.get_shape().as_list() 250 | # Get the size of each capsule in the previous layer and the current layer. 251 | len_u_i = np.prod(shape[2]) 252 | len_v_j = capsules_size 253 | # Get the number of capsule in the layer bellow. 254 | nb_capsules_p = np.prod(shape[1]) 255 | 256 | # w_ij: Used to compute u_hat by multiplying the output ui of a capsule in the layer below 257 | # with this matrix 258 | # [nb_capsules_p, nb_capsules, len_v_j, len_u_i] 259 | _init = tf.random_normal_initializer(stddev=0.01, seed=0) 260 | _shape = (nb_capsules_p, nb_capsules, len_v_j, len_u_i) 261 | w_ij = tf.get_variable('weight', shape=_shape, dtype=tf.float32, initializer=_init) 262 | 263 | # Adding one dimension to the input [batch_size, nb_capsules_p, length(u_i), 1] -> 264 | # [batch_size, nb_capsules_p, 1, length(u_i), 1] 265 | # To allow the next dot product 266 | input_layer = tf.reshape(input_layer, shape=(-1, nb_capsules_p, 1, len_u_i, 1)) 267 | input_layer = tf.tile(input_layer, [1, 1, nb_capsules, 1, 1]) 268 | 269 | # Eq.2, calc u_hat 270 | # Prediction uj|i made by capsule i 271 | # w_ij: [ nb_capsules_p, nb_capsules, len_v_j, len_u_i, ] 272 | # input: [batch_size, nb_capsules_p, nb_capsules, len_ui, 1] 273 | # u_hat: [batch_size, nb_capsules_p, nb_capsules, len_v_j, 1] 274 | # Each capsule of the previous layer capsule layer is associated to a capsule of this layer 275 | u_hat = tf.einsum('abdc,iabcf->iabdf', w_ij, input_layer) 276 | 277 | # bij are the log prior probabilities that capsule i should be coupled to capsule j 278 | # [nb_capsules_p, nb_capsules, 1, 1] 279 | b_ij = tf.zeros(shape=[nb_capsules_p, nb_capsules, 1, 1], dtype=np.float32) 280 | 281 | return routing(u_hat, b_ij, nb_capsules, nb_capsules_p, iterations=iterations) 282 | 283 | def squash(vector): 284 | """ 285 | Squashing function corresponding to Eq. 1 286 | **input: ** 287 | *vector 288 | """ 289 | vector += 0.00001 # Workaround for the squashing function ... 290 | vec_squared_norm = tf.reduce_sum(tf.square(vector), -2, keep_dims=True) 291 | scalar_factor = vec_squared_norm / (1 + vec_squared_norm) / tf.sqrt(vec_squared_norm) 292 | vec_squashed = scalar_factor * vector # element-wise 293 | return(vec_squashed) 294 | 295 | 296 | # In[8]: 297 | 298 | 299 | import numpy as np 300 | from base_model import ModelBase 301 | import tensorflow as tf 302 | 303 | class ModelObjectDetection(ModelBase): 304 | """ 305 | ModelTrafficSign. 306 | This class is used to create the conv graph using: 307 | Dynamic Routing Between Capsules 308 | """ 309 | 310 | # Numbers of label to predict 311 | NB_LABELS = 100 312 | 313 | def __init__(self, model_name, output_folder): 314 | """ 315 | **input: 316 | *model_name: (Integer) Name of this model 317 | *output_folder: Output folder to saved data (tensorboard, checkpoints) 318 | """ 319 | ModelBase.__init__(self, model_name, output_folder=output_folder) 320 | 321 | def _build_inputs(self): 322 | """ 323 | Build tensorflow inputs 324 | (Placeholder) 325 | **return: ** 326 | *tf_images: Images Placeholder 327 | *tf_labels: Labels Placeholder 328 | """ 329 | # Images 32*32*3 330 | tf_images = tf.placeholder(tf.float32, [None, 32, 32, 3], name='images') 331 | # Labels: [0, 1, 6, 20, ...] 332 | tf_labels = tf.placeholder(tf.int64, [None], name='labels') 333 | return tf_images, tf_labels 334 | 335 | def _build_main_network(self, images, conv_2_dropout): 336 | """ 337 | This method is used to create the two convolutions and the CapsNet on the top 338 | **input: 339 | *images: Image PLaceholder 340 | *conv_2_dropout: Dropout value placeholder 341 | **return: ** 342 | *Caps1: Output of first Capsule layer 343 | *Caps2: Output of second Capsule layer 344 | """ 345 | # First BLock: 346 | # Layer 1: Convolution. 347 | shape = (self.h.conv_1_size, self.h.conv_1_size, 3, self.h.conv_1_nb) 348 | conv1 = self._create_conv(self.tf_images, shape, relu=True, max_pooling=False, padding='VALID') 349 | # Layer 2: Convolution. 350 | #shape = (self.h.conv_2_size, self.h.conv_2_size, self.h.conv_1_nb, self.h.conv_2_nb) 351 | #conv2 = self._create_conv(conv1, shape, relu=True, max_pooling=False, padding='VALID') 352 | conv1 = tf.nn.dropout(conv1, keep_prob=conv_2_dropout) 353 | 354 | # Create the first capsules layer 355 | caps1 = conv_caps_layer( 356 | input_layer=conv1, 357 | capsules_size=self.h.caps_1_vec_len, 358 | nb_filters=self.h.caps_1_nb_filter, 359 | kernel=self.h.caps_1_size) 360 | # Create the second capsules layer used to predict the output 361 | caps2 = fully_connected_caps_layer( 362 | input_layer=caps1, 363 | capsules_size=self.h.caps_2_vec_len, 364 | nb_capsules=self.NB_LABELS, 365 | iterations=self.h.routing_steps) 366 | 367 | return caps1, caps2 368 | 369 | def _build_decoder(self, caps2, one_hot_labels, batch_size): 370 | """ 371 | Build the decoder part from the last capsule layer 372 | **input: 373 | *Caps2: Output of second Capsule layer 374 | *one_hot_labels 375 | *batch_size 376 | """ 377 | labels = tf.reshape(one_hot_labels, (-1, self.NB_LABELS, 1)) 378 | # squeeze(caps2): [?, len_v_j, capsules_nb] 379 | # labels: [?, NB_LABELS, 1] with capsules_nb == NB_LABELS 380 | mask = tf.matmul(tf.squeeze(caps2), labels, transpose_a=True) 381 | # Select the good capsule vector 382 | capsule_vector = tf.reshape(mask, shape=(batch_size, self.h.caps_2_vec_len)) 383 | # capsule_vector: [?, len_v_j] 384 | 385 | # Reconstruct image 386 | fc1 = tf.contrib.layers.fully_connected(capsule_vector, num_outputs=400) 387 | fc1 = tf.reshape(fc1, shape=(batch_size, 5, 5, 16)) 388 | upsample1 = tf.image.resize_nearest_neighbor(fc1, (8, 8)) 389 | conv1 = tf.layers.conv2d(upsample1, 4, (3,3), padding='same', activation=tf.nn.relu) 390 | 391 | upsample2 = tf.image.resize_nearest_neighbor(conv1, (16, 16)) 392 | conv2 = tf.layers.conv2d(upsample2, 8, (3,3), padding='same', activation=tf.nn.relu) 393 | 394 | upsample3 = tf.image.resize_nearest_neighbor(conv2, (32, 32)) 395 | conv6 = tf.layers.conv2d(upsample3, 16, (3,3), padding='same', activation=tf.nn.relu) 396 | 397 | # 3 channel for RGG 398 | logits = tf.layers.conv2d(conv6, 3, (3,3), padding='same', activation=None) 399 | decoded = tf.nn.sigmoid(logits, name='decoded') 400 | tf.summary.image('reconstruction_img', decoded) 401 | 402 | return decoded 403 | 404 | def init(self): 405 | """ 406 | Init the graph 407 | """ 408 | # Get graph inputs 409 | self.tf_images, self.tf_labels = self._build_inputs() 410 | # Dropout inputs 411 | self.tf_conv_2_dropout = tf.placeholder(tf.float32, shape=(), name='conv_2_dropout') 412 | # Dynamic batch size 413 | batch_size = tf.shape(self.tf_images)[0] 414 | # Translate labels to one hot array 415 | one_hot_labels = tf.one_hot(self.tf_labels, depth=self.NB_LABELS) 416 | # Create the first convolution and the CapsNet 417 | self.tf_caps1, self.tf_caps2 = self._build_main_network(self.tf_images, self.tf_conv_2_dropout) 418 | 419 | # Build the images reconstruction 420 | self.tf_decoded = self._build_decoder(self.tf_caps2, one_hot_labels, batch_size) 421 | 422 | # Build the loss 423 | _loss = self._build_loss( 424 | self.tf_caps2, one_hot_labels, self.tf_labels, self.tf_decoded, self.tf_images) 425 | (self.tf_loss_squared_rec, self.tf_margin_loss_sum, self.tf_predicted_class, 426 | self.tf_correct_prediction, self.tf_accuracy, self.tf_loss, self.tf_margin_loss, 427 | self.tf_reconstruction_loss) = _loss 428 | 429 | # Build optimizer 430 | optimizer = tf.train.AdamOptimizer(learning_rate=self.h.learning_rate) 431 | self.tf_optimizer = optimizer.minimize(self.tf_loss, global_step=tf.Variable(0, trainable=False)) 432 | 433 | # Log value into tensorboard 434 | tf.summary.scalar('margin_loss', self.tf_margin_loss) 435 | tf.summary.scalar('accuracy', self.tf_accuracy) 436 | tf.summary.scalar('total_loss', self.tf_loss) 437 | tf.summary.scalar('reconstruction_loss', self.tf_reconstruction_loss) 438 | 439 | self.tf_test = tf.random_uniform([2], minval=0, maxval=None, dtype=tf.float32, seed=None, name="tf_test") 440 | 441 | self.init_session() 442 | 443 | 444 | def _build_loss(self, caps2, one_hot_labels, labels, decoded, images): 445 | """ 446 | Build the loss of the graph 447 | """ 448 | # Get the length of each capsule 449 | capsules_length = tf.sqrt(tf.reduce_sum(tf.square(caps2), axis=2, keep_dims=True)) 450 | 451 | max_l = tf.square(tf.maximum(0., 0.9 - capsules_length)) 452 | max_l = tf.reshape(max_l, shape=(-1, self.NB_LABELS)) 453 | max_r = tf.square(tf.maximum(0., capsules_length - 0.1)) 454 | max_r = tf.reshape(max_r, shape=(-1, self.NB_LABELS)) 455 | t_c = one_hot_labels 456 | m_loss = t_c * max_l + 0.5 * (1 - t_c) * max_r 457 | margin_loss_sum = tf.reduce_sum(m_loss, axis=1) 458 | margin_loss = tf.reduce_mean(margin_loss_sum) 459 | 460 | # Reconstruction loss 461 | loss_squared_rec = tf.square(decoded - images) 462 | reconstruction_loss = tf.reduce_mean(loss_squared_rec) 463 | 464 | # 3. Total loss 465 | loss = margin_loss + (0.0005 * reconstruction_loss) 466 | 467 | # Accuracy 468 | predicted_class = tf.argmax(capsules_length, axis=1) 469 | predicted_class = tf.reshape(predicted_class, [tf.shape(capsules_length)[0]]) 470 | correct_prediction = tf.equal(predicted_class, labels) 471 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 472 | 473 | return (loss_squared_rec, margin_loss_sum, predicted_class, correct_prediction, accuracy, 474 | loss, margin_loss, reconstruction_loss) 475 | 476 | def optimize(self, images, labels, tb_save=True): 477 | """ 478 | Train the model 479 | **input: ** 480 | *images: Image to train the model on 481 | *labels: True classes 482 | *tb_save: (Boolean) Log this optimization in tensorboard 483 | **return: ** 484 | Loss: The loss of the model on this batch 485 | Acc: Accuracy of the model on this batch 486 | """ 487 | tensors = [self.tf_optimizer, self.tf_margin_loss, self.tf_accuracy, self.tf_tensorboard] 488 | _, loss, acc, summary = self.sess.run(tensors, 489 | feed_dict={ 490 | self.tf_images: images, 491 | self.tf_labels: labels, 492 | self.tf_conv_2_dropout: self.h.conv_2_dropout 493 | }) 494 | 495 | if tb_save: 496 | # Write data to tensorboard 497 | self.train_writer.add_summary(summary, self.train_writer_it) 498 | self.train_writer_it += 1 499 | 500 | return loss, acc 501 | 502 | def evaluate(self, images, labels, tb_train_save=False, tb_test_save=False): 503 | """ 504 | Evaluate dataset 505 | **input: ** 506 | *images: Image to train the model on 507 | *labels: True classes 508 | *tb_train_save: (Boolean) Log this optimization in tensorboard under the train part 509 | *tb_test_save: (Boolean) Log this optimization in tensorboard under the test part 510 | **return: ** 511 | Loss: The loss of the model on this batch 512 | Acc: Accuracy of the model on this batch 513 | """ 514 | tensors = [self.tf_margin_loss, self.tf_accuracy, self.tf_tensorboard] 515 | loss, acc, summary = self.sess.run(tensors, 516 | feed_dict={ 517 | self.tf_images: images, 518 | self.tf_labels: labels, 519 | self.tf_conv_2_dropout: 1. 520 | }) 521 | 522 | if tb_test_save: 523 | # Write data to tensorboard 524 | self.test_writer.add_summary(summary, self.test_writer_it) 525 | self.test_writer_it += 1 526 | 527 | if tb_train_save: 528 | # Write data to tensorboard 529 | self.train_writer.add_summary(summary, self.train_writer_it) 530 | self.train_writer_it += 1 531 | 532 | return loss, acc 533 | 534 | def predict(self, images): 535 | """ 536 | Method used to predict a class 537 | Return a softmax 538 | **input: ** 539 | *images: Image to train the model on 540 | **return: 541 | *softmax: Softmax between all capsules 542 | """ 543 | tensors = [self.tf_caps2] 544 | 545 | caps2 = self.sess.run(tensors, 546 | feed_dict={ 547 | self.tf_images: images, 548 | self.tf_conv_2_dropout: 1. 549 | })[0] 550 | 551 | # tf.sqrt(tf.reduce_sum(tf.square(caps2), axis=2, keep_dims=True)) 552 | caps2 = np.sqrt(np.sum(np.square(caps2), axis=2, keepdims=True)) 553 | caps2 = np.reshape(caps2, (len(images), self.NB_LABELS)) 554 | # softmax 555 | softmax = np.exp(caps2) / np.sum(np.exp(caps2), axis=1, keepdims=True) 556 | 557 | return softmax 558 | 559 | def reconstruction(self, images, labels): 560 | """ 561 | Method used to get the reconstructions given a batch 562 | Return the result as a softmax 563 | **input: ** 564 | *images: Image to train the model on 565 | *labels: True classes 566 | """ 567 | tensors = [self.tf_decoded] 568 | 569 | decoded = self.sess.run(tensors, 570 | feed_dict={ 571 | self.tf_images: images, 572 | self.tf_labels: labels, 573 | self.tf_conv_2_dropout: 1. 574 | })[0] 575 | 576 | return decoded 577 | 578 | def evaluate_dataset(self, images, labels, batch_size=10): 579 | """ 580 | Evaluate a full dataset 581 | This method is used to fully evaluate the dataset batch per batch. Useful when 582 | the dataset can't be fit inside to the GPU. 583 | *input: ** 584 | *images: Image to train the model on 585 | *labels: True classes 586 | *return: ** 587 | *loss: Loss overall your dataset 588 | *accuracy: Accuracy overall your dataset 589 | *predicted_class: Predicted class 590 | """ 591 | tensors = [self.tf_loss_squared_rec, self.tf_margin_loss_sum, self.tf_correct_prediction, 592 | self.tf_predicted_class] 593 | 594 | loss_squared_rec_list = None 595 | margin_loss_sum_list = None 596 | correct_prediction_list = None 597 | predicted_class = None 598 | 599 | b = 0 600 | for batch in self.get_batches([images, labels], batch_size, shuffle=False): 601 | images_batch, labels_batch = batch 602 | loss_squared_rec, margin_loss_sum, correct_prediction, classes = self.sess.run(tensors, 603 | feed_dict={ 604 | self.tf_images: images_batch, 605 | self.tf_labels: labels_batch, 606 | self.tf_conv_2_dropout: 1. 607 | }) 608 | if loss_squared_rec_list is not None: 609 | predicted_class = np.concatenate((predicted_class, classes)) 610 | loss_squared_rec_list = np.concatenate((loss_squared_rec_list, loss_squared_rec)) 611 | margin_loss_sum_list = np.concatenate((margin_loss_sum_list, margin_loss_sum)) 612 | correct_prediction_list = np.concatenate((correct_prediction_list, correct_prediction)) 613 | else: 614 | predicted_class = classes 615 | loss_squared_rec_list = loss_squared_rec 616 | margin_loss_sum_list = margin_loss_sum 617 | correct_prediction_list = correct_prediction 618 | b += batch_size 619 | 620 | margin_loss = np.mean(margin_loss_sum_list) 621 | reconstruction_loss = np.mean(loss_squared_rec_list) 622 | accuracy = np.mean(correct_prediction_list) 623 | 624 | loss = margin_loss 625 | 626 | return loss, accuracy, predicted_class 627 | 628 | 629 | # In[ ]: 630 | 631 | 632 | tf.reset_default_graph() 633 | model = ModelObjectDetection("ObjectDetecions", output_folder="outputs") 634 | model.init() 635 | 636 | 637 | # In[ ]: 638 | 639 | 640 | ### Train your model here. 641 | ### Calculate and report the accuracy on the training and validation set. 642 | ### Once a final model architecture is selected, 643 | ### the accuracy on the test set should be calculated and reported as well. 644 | ### Feel free to use as many code cells as needed. 645 | 646 | BATCH_SIZE = 1000 647 | 648 | # Utils method to print the current progression 649 | def plot_progression(b, cost, acc, label): print( 650 | "[%s] Batch ID = %s, loss = %s, acc = %s" % (label, b, cost, acc)) 651 | 652 | # Training pipeline 653 | b = 0 654 | valid_batch = inference_datagen.flow(x_test, y_test, batch_size=BATCH_SIZE) 655 | best_validation_loss = None 656 | augmented_factor = 0.99 657 | decrease_factor = 0.90 658 | train_batches = train_datagen.flow(x_train, y_train, batch_size=BATCH_SIZE) 659 | augmented_train_batches = train_datagen_augmented.flow(x_train, y_train, batch_size=BATCH_SIZE) 660 | 661 | while True: 662 | next_batch = next( 663 | augmented_train_batches if random.uniform(0, 1) < augmented_factor else train_batches) 664 | x_batch, y_batch = next_batch 665 | 666 | ### Training 667 | cost, acc = model.optimize(x_batch, y_batch) 668 | ### Validation 669 | x_batch, y_batch = next(valid_batch, None) 670 | # Retrieve the cost and acc on this validation batch and save it in tensorboard 671 | cost_val, acc_val = model.evaluate(x_batch, y_batch, tb_test_save=True) 672 | 673 | if b % 10 == 0: # Plot the last results 674 | plot_progression(b, cost, acc, "Train") 675 | plot_progression(b, cost_val, acc_val, "Validation") 676 | if b % 1000 == 0: # Test the model on all the validation 677 | print("Evaluate full validation dataset ...") 678 | loss, acc, _ = model.evaluate_dataset(x_test, y_test) 679 | print("Current loss: %s Best loss: %s" % (loss, best_validation_loss)) 680 | plot_progression(b, loss, acc, "TOTAL Validation") 681 | if best_validation_loss is None or loss < best_validation_loss: 682 | best_validation_loss = loss 683 | model.save() 684 | augmented_factor = augmented_factor * decrease_factor 685 | print("Augmented Factor = %s" % augmented_factor) 686 | 687 | b += 1 688 | 689 | 690 | # In[ ]: 691 | 692 | 693 | # Test the model on the test set 694 | 695 | # Evaluate all the dataset 696 | loss, acc, predicted_class = model.evaluate_dataset(X_test, y_test) 697 | 698 | print("Test Accuracy = ", acc) 699 | print("Test Loss = ", loss) 700 | 701 | --------------------------------------------------------------------------------