├── .gitignore ├── opennet ├── simple_dnn │ ├── __init__.py │ └── util │ │ ├── __init__.py │ │ └── format.py ├── openmax.py └── opennet.py ├── Supplementary_Material_Learning_a_Neural_network_based_Representation_for_Open_Set_Recognition.pdf ├── README.md └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /opennet/simple_dnn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /opennet/simple_dnn/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Supplementary_Material_Learning_a_Neural_network_based_Representation_for_Open_Set_Recognition.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shrtCKT/opennet/HEAD/Supplementary_Material_Learning_a_Neural_network_based_Representation_for_Open_Set_Recognition.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenNet 2 | 3 | Code for research paper "Learning a Neural Network based Representation for Open Set Recognition" M. Hassen, P. K. Chan 4 | (https://doi.org/10.1137/1.9781611976236.18 , http://arxiv.org/abs/1802.04365) 5 | 6 | If you use this code please cite the paper. 7 | ``` 8 | @inproceedings{hassen2020learning, 9 | title={Learning a neural-network-based representation for open set recognition}, 10 | author={Hassen, Mehadi and Chan, Philip K}, 11 | booktitle={Proceedings of the 2020 SIAM International Conference on Data Mining}, 12 | pages={154--162}, 13 | year={2020}, 14 | organization={SIAM} 15 | } 16 | ``` 17 | 18 | ## Requirements 19 | 20 | * Tensorflow (tested with version 1.3) 21 | * Numpy (tested with version 1.13.1) 22 | * Scipy (tested with version 0.19.1) 23 | * For openmax experiments you will need to clone https://github.com/abhijitbendale/OSDN into the this directory. 24 | -------------------------------------------------------------------------------- /opennet/simple_dnn/util/format.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def reshape_pad(input_shape_2d, desired_shape_2d, input_ch, pad=True, pad_value=-1): 4 | """ 5 | Reshape a flattend 2d input matrix X into 4D. Where: 6 | * axis 0 is batch size. 7 | * axis 1 is image width. 8 | * axis 2 is image height. 9 | * axis 3 is number of image color channels. 10 | 11 | :param input_shape_2d: [input_image_width, input_image_height] 12 | :param desired_shape_2d: [desired_image_width, desired_image_height] 13 | :param input_ch: input image channels. 14 | :param rescale=True: rescale input range between -1 and 1. 15 | Assuming the input range is already between 0 and 1. 16 | :param pad=True: pad the width and height with -1 to reformat to the desired shape. 17 | """ 18 | assert (desired_shape_2d[0] - input_shape_2d[0]) % 2 == 0 19 | assert (desired_shape_2d[1] - input_shape_2d[1]) % 2 == 0 20 | axis_1_pad_size = (desired_shape_2d[0] - input_shape_2d[0]) / 2 21 | axis_2_pad_size = (desired_shape_2d[1] - input_shape_2d[1]) / 2 22 | 23 | def reshape(xs): 24 | """ Reshapes and paddes xs. 25 | :param xs: a 2d array 26 | """ 27 | assert len(xs.shape) == 2 28 | batch_size = xs.shape[0] 29 | 30 | xs = np.reshape(xs, [batch_size, input_shape_2d[0], input_shape_2d[1], input_ch]) 31 | if pad: 32 | xs = np.lib.pad(xs,((0,0), (axis_1_pad_size, axis_1_pad_size), 33 | (axis_2_pad_size, axis_2_pad_size), (0,0)), 34 | 'constant', constant_values=(pad_value, pad_value)) 35 | 36 | return xs 37 | 38 | return reshape 39 | 40 | class NormalScale(object): 41 | """Normalize each dimension to mean = zero and var = scale_factor""" 42 | def __init__(self, train_X, scale_factor=0.5): 43 | """ 44 | train_X - flat training data. 45 | """ 46 | assert len(train_X.shape) == 2 # the training datashould be flat 47 | self.mean = train_X.mean(axis=0)[None, :] 48 | self.var = train_X.var(axis=0)[None, :] 49 | self.scale_factor = scale_factor 50 | 51 | def scale(self, xs): 52 | normalized = ((xs - self.mean) / self.var) * self.scale_factor 53 | normalized = np.where(np.isinf(normalized), 0., normalized) 54 | return np.where(np.isnan(normalized), 0., normalized) 55 | 56 | def inverse_scale(self, xs): 57 | return (xs * self.var / self.scale_factor) + self.mean 58 | 59 | 60 | class UnitPosNegScale(object): 61 | @staticmethod 62 | def scale(xs): 63 | """ Assumes xs is already in unit scale with range of [0, 1] 64 | """ 65 | return (xs - 0.5) * 2.0 #Transform range between -1 and 1 66 | 67 | @staticmethod 68 | def inverse_scale(xs): 69 | return (xs + 1.) / 2. 70 | 71 | def unit_scale(xs): 72 | """ 73 | Scale each column in the range of [0, 1] 74 | :param xs: Input matrix. 75 | """ 76 | assert len(xs.shape) == 2 77 | xs_min = np.amin(xs, axis=0) 78 | xs_range = np.amax(xs, axis=0) - xs_min 79 | xs = np.true_divide(np.subtract(xs, xs_min), xs_range) 80 | xs = np.where(np.isinf(xs), 0., xs) 81 | xs = np.where(np.isnan(xs), 0., xs) 82 | return xs 83 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /opennet/openmax.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os.path 3 | sys.path.insert(0, os.path.abspath("./simple-dnn")) 4 | 5 | import numpy as np 6 | import scipy 7 | import tensorflow as tf 8 | import tensorflow.contrib.slim as slim 9 | import time 10 | 11 | from simple_dnn.util.format import UnitPosNegScale, reshape_pad 12 | 13 | from open_net import OpenNetBase 14 | 15 | # Download OSDN from https://github.com/abhijitbendale/OSDN and compile libmr. 16 | sys.path.insert(0, os.path.abspath("./OSDN")) 17 | import libmr 18 | 19 | class OpenMaxBase(OpenNetBase): 20 | def __init__(self, x_dim, y_dim, 21 | h_dims=[128], 22 | activation_fn=tf.nn.relu, 23 | x_scale=UnitPosNegScale.scale, 24 | x_inverse_scale=UnitPosNegScale.inverse_scale, 25 | x_reshape=None, 26 | c_opt=tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.5), 27 | decision_dist_fn = 'eucos', 28 | dropout = True, keep_prob=0.7, 29 | batch_size=128, iterations=5000, 30 | display_step=500, save_step=500, 31 | model_directory=None, # Directory to save trained model to. 32 | tailsize = 20, 33 | alpharank = 4, 34 | ): 35 | """ 36 | Args: 37 | :param x_dim - dimension of the input 38 | :param y_dim - number of known classes. 39 | :param h_dims - a list of ints. The of units in each fully connected layer. 40 | :param x_inverse_scale - reverse scaling fn. by rescaling from [-1, 1] to original input scale. 41 | If None, the the output of decoder(if there is a decoder) will rescaled. 42 | :param x_reshape - a function to reshape the input before feeding to the networks input layer. 43 | If None, the input will not be reshaped. 44 | :param c_opt - the Optimizer used when updating based on cross entropy loss. Default is AdamOptimizer. 45 | :param decision_dist_fn - distance function used when calculating distance from MAV. 46 | :param batch_size - training barch size. 47 | :param iterations - number of training iterations. 48 | :param display_step - training info displaying interval. 49 | :param save_step - model saving interval. 50 | :param model_directory - directory to save model in. 51 | :param tailsize - int, openmax parameter which specifies the number instances to consider when performing the 52 | weibull tail fitting. 53 | :param alpharank = int, openmax parameter which specifies the number of top-k activation values to take 54 | values when redistributing the activation vector values. 55 | """ 56 | assert decision_dist_fn in ['euclidean', 'eucos'] 57 | self.tailsize = tailsize 58 | assert alpharank < y_dim 59 | self.alpharank = alpharank 60 | 61 | super(OpenMaxBase, self).__init__( 62 | x_dim, y_dim, z_dim=y_dim, 63 | x_scale=x_scale, x_inverse_scale=x_inverse_scale, x_reshape=x_reshape, 64 | opt=None, recon_opt=None, c_opt=c_opt, 65 | decision_dist_fn=decision_dist_fn, dropout=dropout, keep_prob=keep_prob, 66 | batch_size=batch_size, iterations=iterations, 67 | display_step=display_step, save_step=save_step, 68 | model_directory=model_directory, 69 | ce_loss=True, recon_loss=False, inter_loss=False, intra_loss=False, 70 | div_loss=False) 71 | 72 | def dist_from_mav(self, Z, c_mu): 73 | if self.decision_dist_fn == 'euclidean': 74 | return scipy.spatial.distance.cdist(Z, c_mu, metric=self.decision_dist_fn) / 200 75 | elif self.decision_dist_fn == 'eucos': 76 | return (scipy.spatial.distance.cdist(Z, c_mu, metric='euclidean') / 200) + \ 77 | scipy.spatial.distance.cdist(Z, c_mu, metric='cosine') 78 | 79 | def update_class_stats(self, X, y): 80 | z = self.latent(X) 81 | pred_y = self.predict(X) 82 | correct = (pred_y == np.argmax(y, axis=1)) 83 | z = z[correct] 84 | pred_y = pred_y[correct] 85 | 86 | # fit weibull model for each class 87 | self.mr_model = {} 88 | self.c_means = np.zeros((self.y_dim, z.shape[1])) 89 | 90 | for c in range(self.y_dim): 91 | # Calculate Class Mean 92 | z_c = z[pred_y == c] 93 | mu_c = z_c.mean(axis=0) 94 | # Fit Weibull 95 | mr = libmr.MR() 96 | tailtofit = sorted(self.dist_from_mav(z_c, mu_c[None, :]).ravel())[-self.tailsize:] 97 | mr.fit_high(tailtofit, len(tailtofit)) 98 | self.mr_model[c] = mr 99 | self.c_means[c, :] = mu_c 100 | 101 | 102 | def predict_prob_open(self, X): 103 | """ Predicts open set class probabilities for X 104 | """ 105 | z = self.latent(X) 106 | pred_test = self.predict(X) 107 | 108 | alpha_weights = [((self.alpharank+1) - i)/float(self.alpharank) for i in range(1, self.alpharank+1)] 109 | descending_argsort = np.fliplr(np.argsort(z, axis=1)) 110 | z_normalized = np.zeros((z.shape[0], z.shape[1]+1)) 111 | 112 | # compute distance from MAV 113 | all_dist = self.dist_from_mav(z, self.c_means) 114 | 115 | # Compute OpenMax Prob 116 | for i in range(z.shape[0]): # for each data point 117 | for alpha in range(self.alpharank): 118 | c = descending_argsort[i, alpha] 119 | ws_c = 1 - self.mr_model[c].w_score(all_dist[i, c]) * alpha_weights[alpha] 120 | 121 | z_normalized[i, c] = z[i,c] * ws_c 122 | z_normalized[i, -1] += z[i,c] * (1 - ws_c) 123 | 124 | open_prob = np.exp(z_normalized) 125 | if np.any(open_prob.sum(axis=1)[:,None] == np.inf): 126 | print 'Error: Inf value has been returned from w_score function. Consider training with larger tailsize value.' 127 | open_prob = open_prob / open_prob.sum(axis=1)[:,None] 128 | return open_prob 129 | 130 | def predict_open(self, X): 131 | """ Predicts closed set class probabilities for X 132 | """ 133 | open_prob = self.predict_prob_open(X) 134 | return np.argmax(open_prob, axis=1) 135 | 136 | def distance_from_all_classes(self, X, reformat=True): 137 | """ Computes distance of X from all class MAVs. 138 | """ 139 | z = self.latent(X, reformat=reformat) 140 | dist = self.dist_from_mav(z, self.c_means) 141 | 142 | return dist 143 | 144 | def decision_function(self, X): 145 | open_prob = self.predict_prob_open(X) 146 | return open_prob[:, -1] 147 | 148 | 149 | class OpenMaxFlat(OpenMaxBase): 150 | def __init__(self, x_dim, y_dim, 151 | h_dims=[128], 152 | activation_fn=tf.nn.relu, 153 | x_scale=UnitPosNegScale.scale, 154 | x_inverse_scale=UnitPosNegScale.inverse_scale, 155 | x_reshape=None, 156 | c_opt=tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.5), 157 | decision_dist_fn = 'eucos', 158 | dropout = True, keep_prob=0.7, 159 | batch_size=128, iterations=5000, 160 | display_step=500, save_step=500, 161 | model_directory=None, # Directory to save trained model to. 162 | tailsize = 20, 163 | alpharank = 4, 164 | ): 165 | """ 166 | Args: 167 | :param x_dim - dimension of the input 168 | :param y_dim - number of known classes. 169 | :param h_dims - a list of ints. The of units in each fully connected layer. 170 | :param x_inverse_scale - reverse scaling fn. by rescaling from [-1, 1] to original input scale. 171 | If None, the the output of decoder(if there is a decoder) will rescaled. 172 | :param x_reshape - a function to reshape the input before feeding to the networks input layer. 173 | If None, the input will not be reshaped. 174 | :param c_opt - the Optimizer used when updating based on cross entropy loss. Default is AdamOptimizer. 175 | :param batch_size - training batch size. 176 | :param iterations - number of training iterations. 177 | :param display_step - training info displaying interval. 178 | :param save_step - model saving interval. 179 | :param model_directory - directory to save model in. 180 | :param decision_dist_fn - distance function used when calculating distance from MAV. 181 | :param tailsize - int, openmax parameter which specifies the number instances to consider when performing the 182 | weibull tail fitting. 183 | :param alpharank = int, openmax parameter which specifies the number of top-k activation values to take 184 | values when redistributing the activation vector values. 185 | """ 186 | 187 | # Network Setting 188 | if isinstance(h_dims, list) or isinstance(h_dims, tuple): 189 | self.h_dims = h_dims 190 | else: 191 | self.h_dims = [h_dims] 192 | 193 | self.activation_fn = activation_fn 194 | 195 | super(OpenMaxFlat, self).__init__( 196 | x_dim, y_dim, 197 | x_scale=x_scale, x_inverse_scale=x_inverse_scale, x_reshape=x_reshape, 198 | c_opt=c_opt, 199 | decision_dist_fn=decision_dist_fn, dropout=dropout, keep_prob=keep_prob, 200 | batch_size=batch_size, iterations=iterations, 201 | display_step=display_step, save_step=save_step, 202 | model_directory=model_directory, 203 | tailsize=tailsize, alpharank=alpharank) 204 | 205 | self.model_params += ['h_dims', 'activation_fn'] 206 | 207 | 208 | def encoder(self, x, reuse=False): 209 | """ Encoder network. 210 | Args: 211 | :param x - input x. 212 | :param reuse - whether to reuse old network on create new one. 213 | Retuns: 214 | z 215 | """ 216 | net = x 217 | with slim.arg_scope([slim.fully_connected], normalizer_fn=slim.batch_norm, 218 | weights_initializer=tf.contrib.layers.xavier_initializer(), 219 | activation_fn=self.activation_fn): 220 | for i, num_unit in enumerate(self.h_dims): 221 | net = slim.fully_connected( 222 | net, num_unit, 223 | reuse=reuse, scope='enc_{0}'.format(i)) 224 | if self.dropout: 225 | net = slim.dropout(net, keep_prob=self.keep_prob, is_training=self.is_training) 226 | # It is very important to batch normalize the output of encoder. 227 | z = slim.fully_connected( 228 | net, self.z_dim, activation_fn=None, normalizer_fn=slim.batch_norm, 229 | weights_initializer=tf.contrib.layers.xavier_initializer(), 230 | reuse=reuse, scope='enc_z') 231 | 232 | return z 233 | 234 | 235 | def build_model(self): 236 | """ 237 | """ 238 | self.x = tf.placeholder(tf.float32, shape=[None, self.x_dim]) 239 | self.z = tf.placeholder(tf.float32, shape=[None, self.z_dim]) 240 | self.y = tf.placeholder(tf.float32, shape=[None, self.y_dim]) 241 | 242 | self.z = self.encoder(self.x) 243 | 244 | self.x_recon = None 245 | 246 | logits = self.z 247 | 248 | # Calculate class mean 249 | self.class_means = self.bucket_mean(self.z, tf.argmax(self.y, axis=1), self.y_dim) 250 | 251 | self.loss_fn_training_op(self.x, self.y, self.z, logits, self.x_recon, self.class_means) 252 | 253 | self.pred_prob = tf.nn.softmax(logits=logits) 254 | pred = tf.argmax(self.pred_prob, axis=1) 255 | actual = tf.argmax(self.y, axis=1) 256 | self.acc = tf.reduce_mean(tf.cast(tf.equal(pred, actual), tf.float32)) 257 | 258 | # For Inference, set is_training 259 | self.is_training = False 260 | self.z_test = self.encoder(self.x, reuse=True) 261 | self.pred_prob_test = tf.nn.softmax(logits=self.z_test) 262 | self.is_training = True 263 | 264 | class OpenMaxCNN(OpenMaxBase): 265 | def __init__(self, x_dim, x_ch, y_dim, conv_units, hidden_units, 266 | kernel_sizes=[5,5], strides=[1, 1], paddings='SAME', 267 | pooling_enable=False, pooling_kernel=[2,2], 268 | pooling_stride=[2,2], pooling_padding='SAME', 269 | pooling_type='avg', # 'avg' or 'max' 270 | activation_fn=tf.nn.relu, 271 | 272 | x_scale=UnitPosNegScale.scale, 273 | x_inverse_scale=UnitPosNegScale.inverse_scale, 274 | x_reshape=None, 275 | 276 | c_opt=tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.5), 277 | 278 | decision_dist_fn = 'eucos', 279 | dropout=True, keep_prob=0.7, 280 | batch_size=128, iterations=5000, 281 | display_step=500, save_step=500, 282 | model_directory=None, # Directory to save trained model to. 283 | tailsize = 20, 284 | alpharank = 4, 285 | ): 286 | """ 287 | Args: 288 | :param x_dim - dimension of the input 289 | :param y_dim - number of known classes. 290 | :param conv_units - a list of ints. The number of filters in each convolutional layer. 291 | :param hidden_units - a list of ints. The of units in each fully connected layer. 292 | :param kernel_sizes - a list or a list of lists. Size of the kernel of the conv2d. 293 | If a list with two ints all layers use the same kernel size. 294 | Otherwise if a list of list (example [[5,5], [4,4]]) each layer 295 | will have different kernel size. 296 | :param strides - a list or a list of lists. The strides of each conv2d kernel. 297 | :param paddings - padding for each conv2d. Default 'SAME'. 298 | :param pooling_enable - if True, add pooling layer after each conv2d layer. 299 | :param pooling_kernel - a list or a list of lists. The size of the pooling kernel. 300 | If a list with two ints all layers use the same kernel size. 301 | Otherwise if a list of list (example [[5,5], [4,4]]) each layer 302 | will have different kernel size. 303 | :param pooling_stride - a list or a list of lists. The strides of each pooing kernel. 304 | :param pooling_padding - padding for each pool2d layer. Default 'SAME'. 305 | :param pooling_type - pooling layer type. supported 'avg' or 'max'. Default max_pool2d. 306 | :param x_scale - an input scaling function. Default scale to range of [-1, 1]. 307 | If none, the input will not be scaled. 308 | :param x_inverse_scale - reverse scaling fn. by rescaling from [-1, 1] to original input scale. 309 | If None, the the output of decoder(if there is a decoder) will rescaled. 310 | :param x_reshape - a function to reshape the input before feeding to the networks input layer. 311 | If None, the input will not be reshaped. 312 | :param c_opt - the Optimizer used when updating based on cross entropy loss. Default is AdamOptimizer. 313 | :param batch_size - training batch size. 314 | :param iterations - number of training iterations. 315 | :param display_step - training info displaying interval. 316 | :param save_step - model saving interval. 317 | :param model_directory - directory to save model in. 318 | :param decision_dist_fn - distance function used when calculating distance from MAV. 319 | :param tailsize - int, openmax parameter which specifies the number instances to consider when performing the 320 | weibull tail fitting. 321 | :param alpharank = int, openmax parameter which specifies the number of top-k activation values to take 322 | values when redistributing the activation vector values. 323 | """ 324 | self.x_ch = x_ch 325 | 326 | # Conv layer config 327 | self.conv_units = conv_units 328 | if isinstance(kernel_sizes[0], list) or isinstance(kernel_sizes[0], tuple): 329 | assert len(conv_units) == len(kernel_sizes) 330 | self.kernel_sizes = kernel_sizes 331 | else: 332 | self.kernel_sizes = [kernel_sizes] * len(conv_units) 333 | 334 | if isinstance(strides[0], list) or isinstance(strides[0], tuple): 335 | assert len(conv_units) == len(strides) 336 | self.strides = strides 337 | else: 338 | self.strides = [strides] * len(conv_units) 339 | 340 | if isinstance(paddings, list): 341 | assert len(conv_units) == len(paddings) 342 | self.paddings = paddings 343 | else: 344 | self.paddings = [paddings] * len(conv_units) 345 | 346 | # Conv pooling config 347 | self.pooling_enable = pooling_enable 348 | assert pooling_type in ['avg', 'max'] # supported pooling types. 349 | self.pooling_type = pooling_type 350 | 351 | if isinstance(pooling_kernel[0], list) or isinstance(pooling_kernel[0], tuple): 352 | assert len(conv_units) == len(pooling_kernel) 353 | self.pooling_kernels = pooling_kernel 354 | else: 355 | self.pooling_kernels = [pooling_kernel] * len(conv_units) 356 | 357 | if isinstance(pooling_stride[0], list) or isinstance(pooling_stride[0], tuple): 358 | assert len(conv_units) == len(pooling_stride) 359 | self.pooling_strides = pooling_stride 360 | else: 361 | self.pooling_strides = [pooling_stride] * len(conv_units) 362 | 363 | if isinstance(pooling_padding, list): 364 | assert len(conv_units) == len(pooling_padding) 365 | self.pooling_paddings = pooling_padding 366 | else: 367 | self.pooling_paddings = [pooling_padding] * len(conv_units) 368 | 369 | # Fully connected layer config 370 | self.hidden_units = hidden_units 371 | 372 | self.activation_fn = activation_fn 373 | 374 | super(OpenMaxCNN, self).__init__( 375 | x_dim, y_dim, 376 | x_scale=x_scale, x_inverse_scale=x_inverse_scale, x_reshape=x_reshape, 377 | c_opt=c_opt, 378 | decision_dist_fn=decision_dist_fn, dropout=dropout, keep_prob=keep_prob, 379 | batch_size=batch_size, iterations=iterations, 380 | display_step=display_step, save_step=save_step, 381 | model_directory=model_directory, 382 | tailsize=tailsize, alpharank=alpharank) 383 | 384 | self.model_params += ['x_ch', 'conv_units', 'kernel_sizes', 'strides', 'paddings', 385 | 'pooling_enable', 'pooling_type', 'pooling_kernel', 'pooling_strides', 386 | 'pooling_padding', 'hidden_units', 'activation_fn'] 387 | 388 | def build_conv(self, x, reuse=False): 389 | net = x 390 | with slim.arg_scope([slim.conv2d], padding='SAME', 391 | weights_initializer=tf.contrib.layers.xavier_initializer(), 392 | weights_regularizer=slim.l2_regularizer(0.0005), 393 | activation_fn=self.activation_fn): 394 | for i, (c_unit, kernel_size, stride, padding, p_kernel, p_stride, p_padding) in enumerate(zip( 395 | self.conv_units, self.kernel_sizes, self.strides, self.paddings, 396 | self.pooling_kernels, self.pooling_strides, self.pooling_paddings)): 397 | # Conv 398 | net = slim.conv2d(net, c_unit, kernel_size, stride=stride, 399 | normalizer_fn=slim.batch_norm, reuse=reuse, 400 | padding=padding, scope='enc_conv{0}'.format(i)) 401 | 402 | if self.display_step > 0: 403 | print 'Conv_{0}.shape = {1}'.format(i, net.get_shape()) 404 | # Pooling 405 | if self.pooling_enable: 406 | if self.pooling_type == 'max': 407 | net = slim.max_pool2d(net, kernel_size=p_kernel, scope='enc_pool{0}'.format(i), 408 | stride=p_stride, padding=p_padding) 409 | elif self.pooling_type == 'avg': 410 | net = slim.avg_pool2d(net, kernel_size=p_kernel, scope='enc_pool{0}'.format(i), 411 | stride=p_stride, padding=p_padding) 412 | 413 | if self.display_step > 0: 414 | print 'Pooling_{0}.shape = {1}'.format(i, net.get_shape()) 415 | # Dropout: Do NOT use dropout for conv layers. Experiments show it gives poor result. 416 | return net 417 | 418 | def encoder(self, x, reuse=False): 419 | """ Encoder network. 420 | Args: 421 | :param x - input x. 422 | :param reuse - whether to reuse old network on create new one. 423 | Retuns: 424 | Latent variables z 425 | """ 426 | # Conv Layers 427 | net = self.build_conv(x, reuse=reuse) 428 | net = slim.flatten(net) 429 | 430 | # Fully Connected Layer 431 | with slim.arg_scope([slim.fully_connected], normalizer_fn=slim.batch_norm, 432 | weights_initializer=tf.contrib.layers.xavier_initializer(), 433 | weights_regularizer=slim.l2_regularizer(0.0005), 434 | activation_fn=self.activation_fn): 435 | for i, h_unit in enumerate(self.hidden_units): 436 | net = slim.fully_connected(net, h_unit, normalizer_fn=slim.batch_norm, 437 | reuse=reuse, scope='enc_full{0}'.format(i)) 438 | if self.dropout: 439 | net = slim.dropout(net, keep_prob=self.keep_prob, is_training=self.is_training, 440 | scope='enc_full_dropout{0}'.format(i)) 441 | 442 | # Latent Variable 443 | # It is very important to batch normalize the output of encoder. 444 | z = slim.fully_connected( 445 | net, self.z_dim, activation_fn=None, normalizer_fn=slim.batch_norm, 446 | weights_initializer=tf.contrib.layers.xavier_initializer(), 447 | reuse=reuse, scope='enc_z') 448 | 449 | return z 450 | 451 | def build_model(self): 452 | self.x = tf.placeholder(tf.float32, [None, self.x_dim[0], self.x_dim[1], self.x_ch]) 453 | self.z = tf.placeholder(tf.float32, shape=[None, self.z_dim]) 454 | self.y = tf.placeholder(tf.float32, shape=[None, self.y_dim]) 455 | 456 | self.z = self.encoder(self.x) 457 | 458 | self.x_recon = None 459 | 460 | logits = self.z 461 | 462 | # Calculate class mean 463 | self.class_means = self.bucket_mean(self.z, tf.argmax(self.y, axis=1), self.y_dim) 464 | 465 | self.loss_fn_training_op(slim.flatten(self.x), self.y, self.z, 466 | logits, self.x_recon, self.class_means) 467 | 468 | self.pred_prob = tf.nn.softmax(logits=logits) 469 | pred = tf.argmax(self.pred_prob, axis=1) 470 | actual = tf.argmax(self.y, axis=1) 471 | self.acc = tf.reduce_mean(tf.cast(tf.equal(pred, actual), tf.float32)) 472 | 473 | # For Inference, set is_training 474 | self.is_training = False 475 | self.z_test = self.encoder(self.x, reuse=True) 476 | self.pred_prob_test = tf.nn.softmax(logits=self.z_test) 477 | self.is_training = True 478 | -------------------------------------------------------------------------------- /opennet/opennet.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os.path 3 | sys.path.insert(0, os.path.abspath("./simple-dnn")) 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | import tensorflow.contrib.slim as slim 8 | import scipy 9 | import time 10 | 11 | from simple_dnn.util.format import UnitPosNegScale, reshape_pad 12 | 13 | class OpenNetBase(object): 14 | """ OpenNet base class. 15 | """ 16 | def __init__(self, x_dim, y_dim, 17 | z_dim=6, 18 | x_scale=UnitPosNegScale.scale, 19 | x_inverse_scale=UnitPosNegScale.inverse_scale, 20 | x_reshape=None, 21 | opt=tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9), 22 | recon_opt=tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9), 23 | c_opt=tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9), 24 | dist='mean_separation_spread', 25 | decision_dist_fn = 'euclidean', 26 | threshold_type='global', 27 | dropout = True, keep_prob=0.7, 28 | batch_size=128, iterations=5000, 29 | display_step=500, save_step=500, 30 | model_directory=None, # Directory to save trained model to. 31 | density_estimation_factory=None, # Depricated 32 | ce_loss=True, recon_loss=False, inter_loss=True, intra_loss=True, 33 | div_loss=False, combined_loss=False, 34 | contamination=0.01, 35 | ): 36 | """ 37 | Args: 38 | :param x_dim - dimension of the input 39 | :param y_dim - number of known classes. 40 | :param z_dim - the number of latent variables. 41 | :param x_scale - an input scaling function. Default scale to range of [-1, 1]. 42 | If none, the input will not be scaled. 43 | :param x_inverse_scale - reverse scaling fn. by rescaling from [-1, 1] to original input scale. 44 | If None, the the output of decoder(if there is a decoder) will rescaled. 45 | :param x_reshape - a function to reshape the input before feeding to the networks input layer. 46 | If None, the input will not be reshaped. 47 | :param opt - the Optimizer used when updating based on ii-loss. 48 | Used when inter_loss and intra_loss are enabled. Default is AdamOptimizer. 49 | :param recon_opt - the Optimizer used when updating based on reconstruction-loss (Not used ii, ii+ce or ce). 50 | Used when recon_loss is enabled. Default is AdamOptimizer. 51 | :param c_opt - the Optimizer used when updating based on cross entropy loss. 52 | Used for ce and ii+ce modes (i.e. ce_loss is enabled). Default is AdamOptimizer. 53 | :param batch_size - training barch size. 54 | :param iterations - number of training iterations. 55 | :param display_step - training info displaying interval. 56 | :param save_step - model saving interval. 57 | :param model_directory - derectory to save model in. 58 | :param dist - ii-loss calculation mode. Only 'mean_separation_spread' should be used. 59 | :param decision_dist_fn - outlier score distance functions 60 | :param threshold_type - outlier threshold mode. 'global' appears to give better results. 61 | :param ce_loss - Consider cross entropy loss. When enabled with intra_loss and inter_loss gives (ii+ce) mode. 62 | :param recon_loss - Experimental! avoid enabling them. 63 | :param inter_loss - Consider inter-class separation. Should be enabled together with intra_loss for (ii-loss). 64 | :param intra_loss - Consider intra-class spread. Should be enabled together with inter_loss for (ii-loss). 65 | :param div_loss and combined_loss - Experimental. avoid enabling them. 66 | :param contamination - contamination ratio used for outlier threshold estimation. 67 | """ 68 | self.x_dim = x_dim 69 | self.y_dim = y_dim 70 | 71 | self.x_scale = x_scale 72 | self.x_inverse_scale = x_inverse_scale 73 | self.x_reshape = x_reshape 74 | 75 | self.z_dim = z_dim 76 | 77 | self.dropout = dropout 78 | self.is_training = False 79 | self.keep_prob = keep_prob 80 | 81 | self.contamination = contamination 82 | 83 | self.opt = opt 84 | self.recon_opt = recon_opt 85 | self.c_opt = c_opt 86 | 87 | assert dist in ['class_mean', 'all_pair', 'mean_separation_spread', 'min_max'] 88 | self.dist = dist 89 | 90 | self.decision_dist_fn = decision_dist_fn 91 | 92 | assert threshold_type in ['global', 'perclass'] 93 | self.threshold_type = threshold_type 94 | 95 | # Training Config 96 | self.batch_size = batch_size 97 | self.iterations = iterations 98 | self.display_step = display_step 99 | self.save_step = save_step 100 | self.model_directory = model_directory 101 | 102 | self.enable_ce_loss, self.enable_recon_loss, \ 103 | self.enable_inter_loss, self.enable_intra_loss, self.div_loss = \ 104 | ce_loss, recon_loss, inter_loss, intra_loss, div_loss 105 | 106 | self.graph = tf.Graph() 107 | 108 | self.model_params = ['x_dim', 'y_dim', 'z_dim', 'dropout', 'keep_prob', 109 | 'contamination', 'decision_dist_fn', 'dist', 'batch_size', 110 | 'batch_size', 'iterations', 'enable_ce_loss', 111 | 'enable_recon_loss', 'enable_inter_loss', 'enable_intra_loss', 112 | 'div_loss', 'threshold_type'] 113 | 114 | with self.graph.as_default(): 115 | self.sess = tf.Session() 116 | self.build_model() 117 | 118 | # To save and restore all the variables. 119 | self.saver = tf.train.Saver() 120 | 121 | def model_config(self): 122 | return {field: val for field, val in vars(self).items() if field in self.model_params} 123 | 124 | def x_reformat(self, xs): 125 | """ Rescale and reshape x if x_scale and x_reshape functions are provided. 126 | """ 127 | if self.x_scale is not None: 128 | xs = self.x_scale(xs) 129 | if self.x_reshape is not None: 130 | xs = self.x_reshape(xs) 131 | return xs 132 | 133 | def _next_batch(self, x, y): 134 | index = np.random.randint(0, high=x.shape[0], size=self.batch_size) 135 | return x[index], y[index] 136 | 137 | 138 | def encoder(self, x, reuse=False): 139 | """ Encoder network. 140 | Args: 141 | :param x - input x. 142 | :param reuse - whether to reuse old network on create new one. 143 | Returns: 144 | latent var z 145 | """ 146 | pass 147 | 148 | def decoder(self, z, reuse=False): 149 | """ Decoder Network. Experimental! 150 | Args: 151 | :param z - latent variables z. 152 | :param reuse - whether to reuse old network on create new one. 153 | Returns: 154 | The reconstructed x 155 | """ 156 | pass 157 | 158 | def bucket_mean(self, data, bucket_ids, num_buckets): 159 | total = tf.unsorted_segment_sum(data, bucket_ids, num_buckets) 160 | count = tf.unsorted_segment_sum(tf.ones_like(data), bucket_ids, num_buckets) 161 | return total / count 162 | 163 | def bucket_max(self, data, bucket_ids, num_buckets): 164 | b_max = tf.unsorted_segment_max(data, bucket_ids, num_buckets) 165 | return b_max 166 | 167 | def sq_difference_from_mean(self, data, class_mean): 168 | """ Calculates the squared difference from clas mean. 169 | """ 170 | sq_diff_list = [] 171 | for i in range(self.y_dim): 172 | sq_diff_list.append(tf.reduce_mean( 173 | tf.squared_difference(data, class_mean[i]), axis=1)) 174 | 175 | return tf.stack(sq_diff_list, axis=1) 176 | 177 | 178 | def inter_min_intra_max(self, data, labels, class_mean): 179 | """ Calculates intra-class spread as max distance from class means. 180 | Calculates inter-class separation as the distance between the two closest class means. 181 | """ 182 | _, inter_min = self.inter_separation_intra_spred(data, labels, class_mean) 183 | 184 | sq_diff = self.sq_difference_from_mean(data, class_mean) 185 | 186 | # Do element wise mul with labels to use as mask 187 | masked_sq_diff = tf.multiply(sq_diff, tf.cast(labels, dtype=tf.float32)) 188 | intra_max = tf.reduce_sum(tf.reduce_max(masked_sq_diff, axis=0)) 189 | 190 | return intra_max, inter_min 191 | 192 | def inter_intra_diff(self, data, labels, class_mean): 193 | """ Calculates the intra-class and inter-class distance 194 | as the average distance from the class means. 195 | """ 196 | sq_diff = self.sq_difference_from_mean(data, class_mean) 197 | 198 | inter_intra_sq_diff = self.bucket_mean(sq_diff, labels, 2) 199 | inter_class_sq_diff = inter_intra_sq_diff[0] 200 | intra_class_sq_diff = inter_intra_sq_diff[1] 201 | return intra_class_sq_diff, inter_class_sq_diff 202 | 203 | def inter_separation_intra_spred(self, data, labels, class_mean): 204 | """ Calculates intra-class spread as average distance from class means. 205 | Calculates inter-class separation as the distance between the two closest class means. 206 | Returns: 207 | intra-class spread and inter-class separation. 208 | """ 209 | intra_class_sq_diff, _ = self.inter_intra_diff(data, labels, class_mean) 210 | 211 | ap_dist = self.all_pair_distance(class_mean) 212 | dim = tf.shape(class_mean)[0] 213 | not_diag_mask = tf.logical_not(tf.cast(tf.eye(dim), dtype=tf.bool)) 214 | inter_separation = tf.reduce_min(tf.boolean_mask(tensor=ap_dist, mask=not_diag_mask)) 215 | return intra_class_sq_diff, inter_separation 216 | 217 | def all_pair_distance(self, A): 218 | r = tf.reduce_sum(A*A, 1) 219 | 220 | # turn r into column vector 221 | r = tf.reshape(r, [-1, 1]) 222 | D = r - 2*tf.matmul(A, A, transpose_b=True) + tf.transpose(r) 223 | return D 224 | 225 | def all_pair_inter_intra_diff(self, xs, ys): 226 | """ Calculates the intra-class and inter-class distance 227 | as the average distance between all pair of instances intra and inter class 228 | instances. 229 | """ 230 | 231 | def outer(ys): 232 | return tf.matmul(ys, ys, transpose_b=True) 233 | 234 | ap_dist = self.all_pair_distance(xs) 235 | mask = outer(ys) 236 | 237 | dist = self.bucket_mean(ap_dist, mask, 2) 238 | inter_class_sq_diff = dist[0] 239 | intra_class_sq_diff = dist[1] 240 | return intra_class_sq_diff, inter_class_sq_diff 241 | 242 | 243 | def build_model(self): 244 | """ Builds the network graph. 245 | """ 246 | pass 247 | 248 | def loss_fn_training_op(self, x, y, z, logits, x_recon, class_means): 249 | """ Computes the loss functions and creates the update ops. 250 | 251 | :param x - input X 252 | :param y - labels y 253 | :param z - z layer transform of X. 254 | :param logits - softmax logits if ce loss is used. Can be None if only ii-loss. 255 | :param recon - reconstructed X. Experimental! Can be None. 256 | :class_means - the class means. 257 | """ 258 | # Calculate intra class and inter class distance 259 | if self.dist == 'class_mean': # For experimental pupose only 260 | self.intra_c_loss, self.inter_c_loss = self.inter_intra_diff( 261 | z, tf.cast(y, tf.int32), class_means) 262 | elif self.dist == 'all_pair': # For experimental pupose only 263 | self.intra_c_loss, self.inter_c_loss = self.all_pair_inter_intra_diff( 264 | z, tf.cast(y, tf.int32)) 265 | elif self.dist == 'mean_separation_spread': # ii-loss 266 | self.intra_c_loss, self.inter_c_loss = self.inter_separation_intra_spred( 267 | z, tf.cast(y, tf.int32), class_means) 268 | elif self.dist == 'min_max': # For experimental pupose only 269 | self.intra_c_loss, self.inter_c_loss = self.inter_min_intra_max( 270 | z, tf.cast(y, tf.int32), class_means) 271 | 272 | # Calculate reconstruction loss 273 | if self.enable_recon_loss: # For experimental pupose only 274 | self.recon_loss = tf.reduce_mean(tf.squared_difference(x, x_recon)) 275 | 276 | if self.enable_intra_loss and self.enable_inter_loss: # The correct ii-loss 277 | self.loss = tf.reduce_mean(self.intra_c_loss - self.inter_c_loss) 278 | elif self.enable_intra_loss and not self.enable_inter_loss: # For experimental pupose only 279 | self.loss = tf.reduce_mean(self.intra_c_loss) 280 | elif not self.enable_intra_loss and self.enable_inter_loss: # For experimental pupose only 281 | self.loss = tf.reduce_mean(-self.inter_c_loss) 282 | elif self.div_loss: # For experimental pupose only 283 | self.loss = tf.reduce_mean(self.intra_c_loss / self.inter_c_loss) 284 | else: # For experimental pupose only 285 | self.loss = tf.reduce_mean((self.recon_loss * 1. if self.enable_recon_loss else 0.) 286 | + (self.intra_c_loss * 1. if self.enable_intra_loss else 0.) 287 | - (self.inter_c_loss * 1. if self.enable_inter_loss else 0.) 288 | ) 289 | 290 | # Classifier loss 291 | if self.enable_ce_loss: 292 | self.ce_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=self.y)) 293 | 294 | tvars = tf.trainable_variables() 295 | e_vars = [var for var in tvars if 'enc_' in var.name ] 296 | classifier_vars = [var for var in tvars if 'enc_' in var.name or 'classifier_' in var.name] 297 | recon_vars = [var for var in tvars if 'enc_' in var.name or 'dec_' in var.name] 298 | 299 | # Training Ops 300 | if self.enable_recon_loss: 301 | self.recon_train_op = self.recon_opt.minimize(self.recon_loss, var_list=recon_vars) 302 | 303 | if self.enable_inter_loss or self.enable_intra_loss or self.div_loss: 304 | self.train_op = self.opt.minimize(self.loss, var_list=e_vars) 305 | 306 | if self.enable_ce_loss: 307 | self.ce_train_op = self.c_opt.minimize(self.ce_loss, var_list=classifier_vars) 308 | 309 | 310 | def fit(self, X, y, X_val=None, y_val=None): 311 | """ Fit model. 312 | """ 313 | assert y.shape[1] == self.y_dim 314 | start = time.time() 315 | self.is_training = True 316 | count_skip = 0 317 | with self.graph.as_default(): 318 | self.sess.run(tf.global_variables_initializer()) 319 | i = 0 320 | while i < self.iterations: 321 | xs, ys = self._next_batch(X, y) 322 | xs = self.x_reformat(xs) 323 | 324 | intra_c_loss, inter_c_loss, recon_loss, loss, ce_loss, acc, val_acc = \ 325 | None, None, None, None, None, None, None 326 | 327 | if len(np.unique(np.argmax(ys, axis=1))) != self.y_dim: 328 | count_skip += 1 329 | continue 330 | 331 | if self.enable_inter_loss or self.enable_intra_loss or self.div_loss: 332 | _, intra_c_loss, inter_c_loss, loss = self.sess.run( 333 | [self.train_op, self.intra_c_loss, self.inter_c_loss, self.loss], 334 | feed_dict={self.x:xs, self.y:ys}) 335 | 336 | if self.enable_recon_loss: 337 | _, recon_loss = self.sess.run( 338 | [self.recon_train_op, self.recon_loss], 339 | feed_dict={self.x:xs, self.y:ys}) 340 | 341 | if self.enable_ce_loss: 342 | _, ce_loss, acc = self.sess.run( 343 | [self.ce_train_op, self.ce_loss, self.acc], 344 | feed_dict={self.x:xs, self.y:ys}) 345 | if X_val is not None and y_val is not None: 346 | val_acc = self.sess.run( 347 | self.acc, 348 | feed_dict={self.x:self.x_reformat(X_val), self.y:y_val}) 349 | 350 | if i % self.display_step == 0 and self.display_step > 0: 351 | if (self.enable_inter_loss and self.enable_intra_loss) or self.div_loss: 352 | self.update_class_stats(X, y) 353 | acc = (self.predict(xs, reformat=False) == np.argmax(ys, axis=1)).mean() 354 | if X_val is not None and y_val is not None: 355 | val_acc = (self.predict(X_val) == np.argmax(y_val, axis=1)).astype(np.float).mean() 356 | 357 | self._iter_stats(i, start, intra_c_loss, inter_c_loss, recon_loss, loss, ce_loss, 358 | acc, val_acc) 359 | if i % self.save_step == 0 and i != 0 and self.model_directory is not None: 360 | self.save_model('model-'+str(i)+'.cptk') 361 | print "Saved Model" 362 | 363 | i += 1 364 | 365 | if self.display_step > 0: 366 | if (self.enable_inter_loss and self.enable_intra_loss) or self.div_loss: 367 | self.update_class_stats(X, y) 368 | acc = (self.predict(xs, reformat=False) == np.argmax(ys, axis=1)).mean() 369 | if X_val is not None and y_val is not None: 370 | val_acc = (self.predict(X_val) == np.argmax(y_val, axis=1)).mean() 371 | 372 | self._iter_stats(i, start, intra_c_loss, inter_c_loss, recon_loss, loss, ce_loss, acc, val_acc) 373 | if self.model_directory is not None: 374 | self.save_model('model-'+str(i)+'.cptk') 375 | print "Saved Model" 376 | 377 | # Save class means and cov 378 | self.update_class_stats(X, y) 379 | 380 | # Compute and store the selected thresholds for each calls 381 | self.class_thresholds(X, y) 382 | # print 'n_skipped_batches = ', count_skip 383 | 384 | self.is_training = False 385 | 386 | def update_class_stats(self, X, y): 387 | """ Recalculates class means. 388 | """ 389 | z = self.latent(X) 390 | self.is_training = False 391 | 392 | # No need to feed z_test here. because self.latent() already used z_test 393 | self.c_means = self.sess.run(self.class_means, 394 | feed_dict={self.z:z, self.y:y}) 395 | if self.decision_dist_fn == 'mahalanobis': 396 | self.c_cov, self.c_cov_inv = self.class_covarience(z, y) 397 | self.is_training = True 398 | 399 | def class_covarience(self, Z, y): 400 | dim = self.z_dim 401 | 402 | per_class_cov = np.zeros((y.shape[1], dim, dim)) 403 | per_class_cov_inv = np.zeros_like(per_class_cov) 404 | for c in range(y.shape[1]): 405 | per_class_cov[c, :, :] = np.cov((Z[y[:, c].astype(bool)]).T) 406 | per_class_cov_inv[c, :, :] = np.linalg.pinv(per_class_cov[c, :, :]) 407 | 408 | return per_class_cov, per_class_cov_inv 409 | 410 | def _iter_stats(self, i, start_time, intra_c_loss, inter_c_loss, recon_loss, loss, ce_loss, acc, val_acc): 411 | if i == 0: 412 | print '{0:5}|{1:7}|{2:7}|{3:7}|{4:7}|{5:7}|{6:7}|{7:7}|{8:7}|'.format( 413 | 'i', 'Intra', 'Inter', 'Recon', 'Total', 'CrossE', 'Acc', 'V_Acc', 'TIME(s)') 414 | 415 | print '{0:5}|{1:7.4}|{2:7.4}|{3:7.4}|{4:7.4}|{5:7.4}|{6:7.4}|{7:7.4}|{8:7}|'.format( 416 | i, intra_c_loss, inter_c_loss, recon_loss, loss, ce_loss, acc, val_acc, 417 | int(time.time()-start_time)) 418 | 419 | def latent(self, X, reformat=True): 420 | """ Computes the z-layer output. 421 | """ 422 | self.is_training = False 423 | z = np.zeros((X.shape[0], self.z_dim)) 424 | batch = self.batch_size 425 | with self.graph.as_default(): 426 | for i in range(0, X.shape[0], batch): 427 | start = i 428 | end = min(i+batch, X.shape[0]) 429 | z[start: end] = self.sess.run( 430 | self.z_test, feed_dict={self.x:self.x_reformat(X[start: end]) if reformat else X[start: end]}) 431 | 432 | self.is_training = True 433 | return z 434 | 435 | def reconstruct(self, X): 436 | self.is_training = False 437 | with self.graph.as_default(): 438 | x_recon = self.sess.run(self.x_recon_test, 439 | feed_dict={self.x: self.x_reformat(X)}) 440 | self.is_training = True 441 | return x_recon 442 | 443 | def distance_from_all_classes(self, X, reformat=True): 444 | """ Computes the distance of each instance from all class means. 445 | """ 446 | z = self.latent(X, reformat=reformat) 447 | dist = np.zeros((z.shape[0], self.y_dim)) 448 | for j in range(self.y_dim): 449 | if self.decision_dist_fn == 'euclidean': # squared euclidean 450 | dist[:, j] = np.sum(np.square(z - self.c_means[j]), axis=1) 451 | elif self.decision_dist_fn == 'mahalanobis': 452 | dist[:, j] = scipy.spatial.distance.cdist( 453 | z, self.c_means[j][None, :], 454 | 'mahalanobis', VI=self.c_cov_inv[j]).reshape((z.shape[0])) 455 | else: 456 | ValueError('Error: Unsupported decision_dist_fn "{0}"'.format(self.decision_dist_fn)) 457 | 458 | return dist 459 | 460 | def decision_function(self, X): 461 | """ Computes the outlier score. The larger the score the more likely it is an outlier. 462 | """ 463 | dist = self.distance_from_all_classes(X) 464 | return np.amin(dist, axis=1) 465 | 466 | def predict_prob(self, X, reformat=True): 467 | """ Predicts class probabilities for X over known classes. 468 | """ 469 | self.is_training = False 470 | if self.enable_ce_loss: 471 | with self.graph.as_default(): 472 | batch = self.batch_size 473 | prob = np.zeros((X.shape[0], self.y_dim)) 474 | for i in range(0, X.shape[0], batch): 475 | start = i 476 | end = min(i+batch, X.shape[0]) 477 | prob[start:end] = self.sess.run( 478 | self.pred_prob_test, 479 | feed_dict={self.x: self.x_reformat(X[start:end]) if reformat else X[start:end]}) 480 | 481 | elif (self.enable_inter_loss and self.enable_intra_loss) or (self.div_loss) or self.enable_recon_loss: 482 | dist = self.distance_from_all_classes(X, reformat=reformat) 483 | 484 | prob = np.exp(-dist) 485 | prob = prob / prob.sum(axis=1)[:,None] 486 | 487 | self.is_training = True 488 | return prob 489 | 490 | def predict(self, X, reformat=True): 491 | """ Performs closed set classification (i.e. prediction over known classes). 492 | """ 493 | prob = self.predict_prob(X, reformat=reformat) 494 | return np.argmax(prob, axis=1) 495 | 496 | def predict_open(self, X): 497 | """ Performs open set recognition/classification. 498 | """ 499 | pred = self.predict(X) 500 | unknown_class_label = self.y_dim 501 | score = self.decision_function(X) 502 | for i in range(X.shape[0]): 503 | if score[i] > self.threshold[pred[i]]: 504 | pred[i] = unknown_class_label 505 | 506 | return pred 507 | 508 | 509 | def class_thresholds(self, X, y): 510 | """ Computes class thresholds. Shouldn't be called from outside. 511 | """ 512 | score = self.decision_function(X) 513 | if self.threshold_type == 'global': 514 | self.threshold = np.ones(self.y_dim) 515 | cutoff_idx = max(1, int(score.shape[0] * 0.01)) 516 | self.threshold *= sorted(score)[-cutoff_idx] 517 | elif self.threshold_type == 'perclass': 518 | c_count = y.sum(axis=0) 519 | self.threshold = np.zeros_like(c_count) 520 | for c in range(y.shape[1]): 521 | sorted_c_scores = sorted(score[y[:, c].astype(bool)]) 522 | cutoff_idx = max(1, int(c_count[c] * self.contamination)) 523 | self.threshold[c] = sorted_c_scores[-cutoff_idx] 524 | 525 | return self.threshold 526 | 527 | class OpenNetFlat(OpenNetBase): 528 | """ OpenNet with only fully connected layers. 529 | """ 530 | def __init__(self, x_dim, y_dim, 531 | z_dim=6, h_dims=[128], 532 | activation_fn=tf.nn.relu, 533 | x_scale=UnitPosNegScale.scale, 534 | x_inverse_scale=UnitPosNegScale.inverse_scale, 535 | x_reshape=None, 536 | opt=tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9), 537 | recon_opt=tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9), 538 | c_opt=tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9), 539 | dist='mean_separation_spread', 540 | decision_dist_fn = 'euclidean', 541 | threshold_type='global', 542 | dropout = True, keep_prob=0.7, 543 | batch_size=128, iterations=5000, 544 | display_step=500, save_step=500, 545 | model_directory=None, # Directory to save trained model to. 546 | density_estimation_factory=None, # Depricated 547 | ce_loss=True, recon_loss=False, inter_loss=True, intra_loss=True, 548 | div_loss=False, combined_loss=False, 549 | contamination=0.02, 550 | ): 551 | """ 552 | Args: 553 | :param x_dim - dimension of the input 554 | :param y_dim - number of known classes. 555 | :param z_dim - the number of latent variables. 556 | :param h_dims - an int or a list; number of units in the fully conected hidden layers of the 557 | encoder network. The decoder network (if used) will simply be the reverse. 558 | :param x_scale - an input scaling function. Default scale to range of [-1, 1]. 559 | If none, the input will not be scaled. 560 | :param x_inverse_scale - reverse scaling fn. by rescaling from [-1, 1] to original input scale. 561 | If None, the the output of decoder(if there is a decoder) will rescaled. 562 | :param x_reshape - a function to reshape the input before feeding to the networks input layer. 563 | If None, the input will not be reshaped. 564 | :param opt - the Optimizer used when updating based on ii-loss. 565 | Used when inter_loss and intra_loss are enabled. Default is AdamOptimizer. 566 | :param recon_opt - the Optimizer used when updating based on reconstruction-loss (Not used ii, ii+ce or ce). 567 | Used when recon_loss is enabled. Default is AdamOptimizer. 568 | :param c_opt - the Optimizer used when updating based on cross entropy loss. 569 | Used for ce and ii+ce modes (i.e. ce_loss is enabled). Default is AdamOptimizer. 570 | :param batch_size - training batch size. 571 | :param iterations - number of training iterations. 572 | :param display_step - training info displaying interval. 573 | :param save_step - model saving interval. 574 | :param model_directory - directory to save model in. 575 | :param dist - ii-loss calculation mode. Only 'mean_separation_spread' should be used. 576 | :param decision_dist_fn - outlier score distance functions 577 | :param threshold_type - outlier threshold mode. 'global' appears to give better results. 578 | :param ce_loss - Consider cross entropy loss. When enabled with intra_loss and inter_loss gives (ii+ce) mode. 579 | :param recon_loss - Experimental! Avoid enabling this. 580 | :param inter_loss - Consider inter-class separation. Should be enabled together with intra_loss for (ii-loss). 581 | :param intra_loss - Consider intra-class spread. Should be enabled together with inter_loss for (ii-loss). 582 | :param div_loss and combined_loss - Experimental. Avoid enabling them. 583 | :param contamination - contamination ratio used for outlier threshold estimation. 584 | """ 585 | 586 | # Network Setting 587 | if isinstance(h_dims, list) or isinstance(h_dims, tuple): 588 | self.h_dims = h_dims 589 | else: 590 | self.h_dims = [h_dims] 591 | 592 | self.activation_fn = activation_fn 593 | 594 | assert decision_dist_fn in ['euclidean', 'mahalanobis'] 595 | 596 | super(OpenNetFlat, self).__init__( 597 | x_dim, y_dim, z_dim=z_dim, 598 | x_scale=x_scale, x_inverse_scale=x_inverse_scale, x_reshape=x_reshape, 599 | opt=opt, recon_opt=recon_opt, c_opt=c_opt, threshold_type=threshold_type, 600 | dist=dist, decision_dist_fn=decision_dist_fn, dropout=dropout, keep_prob=keep_prob, 601 | batch_size=batch_size, iterations=iterations, 602 | display_step=display_step, save_step=save_step, 603 | model_directory=model_directory, 604 | ce_loss=ce_loss, recon_loss=recon_loss, inter_loss=inter_loss, intra_loss=intra_loss, 605 | div_loss=div_loss, contamination=contamination) 606 | 607 | self.model_params += ['h_dims', 'activation_fn'] 608 | 609 | 610 | def encoder(self, x, reuse=False): 611 | """ Encoder network. 612 | Args: 613 | :param x - input x. 614 | :param reuse - whether to reuse old network on create new one. 615 | Returns: 616 | A tuple z, softmax input logits 617 | """ 618 | net = x 619 | with slim.arg_scope([slim.fully_connected], 620 | weights_initializer=tf.contrib.layers.xavier_initializer(), 621 | activation_fn=self.activation_fn): 622 | for i, num_unit in enumerate(self.h_dims): 623 | net = slim.fully_connected( 624 | net, num_unit, 625 | normalizer_fn=slim.batch_norm, 626 | reuse=reuse, scope='enc_{0}'.format(i)) 627 | if self.dropout: 628 | net = slim.dropout(net, keep_prob=self.keep_prob, is_training=self.is_training) 629 | # It is very important to batch normalize the output of encoder. 630 | z = slim.fully_connected( 631 | net, self.z_dim, activation_fn=None, 632 | normalizer_fn=slim.batch_norm, 633 | weights_initializer=tf.contrib.layers.xavier_initializer(), 634 | reuse=reuse, scope='enc_z') 635 | 636 | # This used when CE cost is enabled. 637 | logits = slim.fully_connected( 638 | z, self.y_dim, activation_fn=None, 639 | weights_initializer=tf.contrib.layers.xavier_initializer(), 640 | scope='classifier_logits', reuse=reuse) 641 | 642 | return z, logits 643 | 644 | def decoder(self, z, reuse=False): 645 | """ Decoder Network. 646 | Args: 647 | :param z - latent variables z. 648 | :param reuse - whether to reuse old network on create new one. 649 | Returns: 650 | The reconstructed x 651 | """ 652 | net = z 653 | 654 | with slim.arg_scope([slim.fully_connected], 655 | weights_initializer=tf.contrib.layers.xavier_initializer(), 656 | activation_fn=self.activation_fn): 657 | h_dims_revese = [self.h_dims[i] 658 | for i in range(len(self.h_dims) - 1, -1, -1)] 659 | for i, num_unit in enumerate(h_dims_revese): 660 | net = slim.fully_connected( 661 | net, num_unit, 662 | normalizer_fn=slim.batch_norm, 663 | reuse=reuse, scope='dec_{0}'.format(i)) 664 | if self.dropout: 665 | net = slim.dropout(net, keep_prob=self.keep_prob, is_training=self.is_training) 666 | 667 | dec_out = slim.fully_connected( 668 | net, self.x_dim, activation_fn=tf.nn.tanh, 669 | weights_initializer=tf.contrib.layers.xavier_initializer(), 670 | reuse=reuse, scope='dec_out') 671 | return dec_out 672 | 673 | 674 | def build_model(self): 675 | self.x = tf.placeholder(tf.float32, shape=[None, self.x_dim]) 676 | self.z = tf.placeholder(tf.float32, shape=[None, self.z_dim]) 677 | self.y = tf.placeholder(tf.float32, shape=[None, self.y_dim]) 678 | 679 | self.z, logits = self.encoder(self.x) 680 | 681 | if self.enable_recon_loss: 682 | self.x_recon = self.decoder(self.z) 683 | else: 684 | self.x_recon = None 685 | 686 | # Calculate class mean 687 | self.class_means = self.bucket_mean(self.z, tf.argmax(self.y, axis=1), self.y_dim) 688 | 689 | self.loss_fn_training_op(self.x, self.y, self.z, logits, self.x_recon, self.class_means) 690 | 691 | self.pred_prob = tf.nn.softmax(logits=logits) 692 | pred = tf.argmax(self.pred_prob, axis=1) 693 | actual = tf.argmax(self.y, axis=1) 694 | self.acc = tf.reduce_mean(tf.cast(tf.equal(pred, actual), tf.float32)) 695 | 696 | # For Inference, set is_training 697 | self.is_training = False 698 | self.z_test, logits_test = self.encoder(self.x, reuse=True) 699 | self.pred_prob_test = tf.nn.softmax(logits=logits_test) 700 | if self.enable_recon_loss: 701 | self.x_recon_test = self.decoder(self.z_test, reuse=True) 702 | self.is_training = True 703 | 704 | 705 | class OpenNetCNN(OpenNetBase): 706 | """ OpenNet with convolutional and fully connected layers. 707 | Current supports simple architecture with alternating cov and pooling layers. 708 | """ 709 | def __init__(self, x_dim, x_ch, y_dim, conv_units, hidden_units, 710 | z_dim=6, 711 | kernel_sizes=[5,5], strides=[1, 1], paddings='SAME', 712 | pooling_enable=False, pooling_kernel=[2,2], 713 | pooling_stride=[2,2], pooling_padding='SAME', 714 | pooling_type='max', # 'avg' or 'max' 715 | activation_fn=tf.nn.relu, 716 | 717 | x_scale=UnitPosNegScale.scale, 718 | x_inverse_scale=UnitPosNegScale.inverse_scale, 719 | x_reshape=None, 720 | 721 | opt=tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9), 722 | recon_opt=tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9), 723 | c_opt=tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9), 724 | 725 | dist='mean_separation_spread', 726 | decision_dist_fn = 'euclidean', 727 | threshold_type='global', 728 | dropout = True, keep_prob=0.7, 729 | batch_size=128, iterations=5000, 730 | display_step=500, save_step=500, 731 | model_directory=None, # Directory to save trained model to. 732 | density_estimation_factory=None, # deprecated 733 | ce_loss=False, recon_loss=False, inter_loss=True, intra_loss=True, 734 | div_loss=False, 735 | contamination=0.01, 736 | ): 737 | """ 738 | Args: 739 | :param x_dim - dimension of the input 740 | :param y_dim - number of known classes. 741 | :param conv_units - a list of ints. The number of filters in each convolutional layer. 742 | :param hidden_units - a list of ints. The of units in each fully connected layer. 743 | :param z_dim - the number of latent variables. 744 | :param h_dims - an int or a list; number of units in the fully connected hidden layers of the 745 | encoder network. 746 | :param kernel_sizes - a list or a list of lists. Size of the kernel of the conv2d. 747 | If a list with two ints all layers use the same kernel size. 748 | Otherwise if a list of list (example [[5,5], [4,4]]) each layer 749 | will have different kernel size. 750 | :param strides - a list or a list of lists. The strides of each conv2d kernel. 751 | :param paddings - padding for each conv2d. Default 'SAME'. 752 | :param pooling_enable - if True, add pooling layer after each conv2d layer. 753 | :param pooling_kernel - a list or a list of lists. The size of the pooling kernel. 754 | If a list with two ints all layers use the same kernel size. 755 | Otherwise if a list of list (example [[5,5], [4,4]]) each layer 756 | will have different kernel size. 757 | :param pooling_stride - a list or a list of lists. The strides of each pooing kernel. 758 | :param pooling_padding - padding for each pool2d layer. Default 'SAME'. 759 | :param pooling_type - pooling layer type. supported 'avg' or 'max'. Default max_pool2d. 760 | :param x_scale - an input scaling function. Default scale to range of [-1, 1]. 761 | If none, the input will not be scaled. 762 | :param x_inverse_scale - reverse scaling fn. by rescaling from [-1, 1] to original input scale. 763 | If None, the the output of decoder(if there is a decoder) will rescaled. 764 | :param x_reshape - a function to reshape the input before feeding to the networks input layer. 765 | If None, the input will not be reshaped. 766 | :param opt - the Optimizer used when updating based on ii-loss. 767 | Used when inter_loss and intra_loss are enabled. Default is AdamOptimizer. 768 | :param recon_opt - the Optimizer used when updating based on reconstruction-loss (Not used ii, ii+ce or ce). 769 | Used when recon_loss is enabled. Default is AdamOptimizer. 770 | :param c_opt - the Optimizer used when updating based on cross entropy loss. 771 | Used for ce and ii+ce modes (i.e. ce_loss is enabled). Default is AdamOptimizer. 772 | :param batch_size - training batch size. 773 | :param iterations - number of training iterations. 774 | :param display_step - training info displaying interval. 775 | :param save_step - model saving interval. 776 | :param model_directory - directory to save model in. 777 | :param dist - ii-loss calculation mode. Only 'mean_separation_spread' should be used. 778 | :param decision_dist_fn - outlier score distance functions 779 | :param threshold_type - outlier threshold mode. 'global' appears to give better results. 780 | :param ce_loss - Consider cross entropy loss. When enabled with intra_loss and inter_loss gives (ii+ce) mode. 781 | :param recon_loss - Experimental! Avoid enabling this. 782 | :param inter_loss - Consider inter-class separation. Should be enabled together with intra_loss for (ii-loss). 783 | :param intra_loss - Consider intra-class spread. Should be enabled together with inter_loss for (ii-loss). 784 | :param div_loss and combined_loss - Experimental. Avoid enabling them. 785 | :param contamination - contamination ratio used for outlier threshold estimation. 786 | """ 787 | self.x_ch = x_ch 788 | 789 | # Conv layer config 790 | self.conv_units = conv_units 791 | if isinstance(kernel_sizes[0], list) or isinstance(kernel_sizes[0], tuple): 792 | assert len(conv_units) == len(kernel_sizes) 793 | self.kernel_sizes = kernel_sizes 794 | else: 795 | self.kernel_sizes = [kernel_sizes] * len(conv_units) 796 | 797 | if isinstance(strides[0], list) or isinstance(strides[0], tuple): 798 | assert len(conv_units) == len(strides) 799 | self.strides = strides 800 | else: 801 | self.strides = [strides] * len(conv_units) 802 | 803 | if isinstance(paddings, list): 804 | assert len(conv_units) == len(paddings) 805 | self.paddings = paddings 806 | else: 807 | self.paddings = [paddings] * len(conv_units) 808 | 809 | # Conv pooling config 810 | self.pooling_enable = pooling_enable 811 | assert pooling_type in ['avg', 'max'] # supported pooling types. 812 | self.pooling_type = pooling_type 813 | 814 | if isinstance(pooling_kernel[0], list) or isinstance(pooling_kernel[0], tuple): 815 | assert len(conv_units) == len(pooling_kernel) 816 | self.pooling_kernels = pooling_kernel 817 | else: 818 | self.pooling_kernels = [pooling_kernel] * len(conv_units) 819 | 820 | if isinstance(pooling_stride[0], list) or isinstance(pooling_stride[0], tuple): 821 | assert len(conv_units) == len(pooling_stride) 822 | self.pooling_strides = pooling_stride 823 | else: 824 | self.pooling_strides = [pooling_stride] * len(conv_units) 825 | 826 | if isinstance(pooling_padding, list): 827 | assert len(conv_units) == len(pooling_padding) 828 | self.pooling_paddings = pooling_padding 829 | else: 830 | self.pooling_paddings = [pooling_padding] * len(conv_units) 831 | 832 | # Fully connected layer config 833 | self.hidden_units = hidden_units 834 | 835 | self.activation_fn = activation_fn 836 | 837 | assert decision_dist_fn in ['euclidean', 'mahalanobis'] 838 | 839 | super(OpenNetCNN, self).__init__( 840 | x_dim, y_dim, z_dim=z_dim, 841 | x_scale=x_scale, x_inverse_scale=x_inverse_scale, x_reshape=x_reshape, 842 | opt=opt, recon_opt=recon_opt, c_opt=c_opt, threshold_type=threshold_type, 843 | dist=dist, decision_dist_fn=decision_dist_fn, dropout=dropout, keep_prob=keep_prob, 844 | batch_size=batch_size, iterations=iterations, 845 | display_step=display_step, save_step=save_step, 846 | model_directory=model_directory, 847 | ce_loss=ce_loss, recon_loss=recon_loss, inter_loss=inter_loss, intra_loss=intra_loss, 848 | div_loss=div_loss, contamination=contamination) 849 | 850 | 851 | self.model_params += ['x_ch', 'conv_units', 'kernel_sizes', 'strides', 'paddings', 852 | 'pooling_enable', 'pooling_type', 'pooling_kernel', 'pooling_strides', 853 | 'pooling_padding', 'hidden_units', 'activation_fn'] 854 | 855 | 856 | def build_conv(self, x, reuse=False): 857 | """ Builds the convolutional layers. 858 | """ 859 | net = x 860 | with slim.arg_scope([slim.conv2d], padding='SAME', 861 | weights_initializer=tf.contrib.layers.xavier_initializer(),#tf.truncated_normal_initializer(stddev=0.01), 862 | weights_regularizer=slim.l2_regularizer(0.0005), 863 | activation_fn=self.activation_fn): 864 | for i, (c_unit, kernel_size, stride, padding, p_kernel, p_stride, p_padding) in enumerate(zip( 865 | self.conv_units, self.kernel_sizes, self.strides, self.paddings, 866 | self.pooling_kernels, self.pooling_strides, self.pooling_paddings)): 867 | # Conv 868 | net = slim.conv2d(net, c_unit, kernel_size, stride=stride, 869 | normalizer_fn=slim.batch_norm, 870 | reuse=reuse, padding=padding, scope='enc_conv{0}'.format(i)) 871 | 872 | if self.display_step > 0: 873 | print 'Conv_{0}.shape = {1}'.format(i, net.get_shape()) 874 | # Pooling 875 | if self.pooling_enable: 876 | if self.pooling_type == 'max': 877 | net = slim.max_pool2d(net, kernel_size=p_kernel, scope='enc_pool{0}'.format(i), 878 | stride=p_stride, padding=p_padding) 879 | elif self.pooling_type == 'avg': 880 | net = slim.avg_pool2d(net, kernel_size=p_kernel, scope='enc_pool{0}'.format(i), 881 | stride=p_stride, padding=p_padding) 882 | 883 | if self.display_step > 0: 884 | print 'Pooling_{0}.shape = {1}'.format(i, net.get_shape()) 885 | # Dropout: Do NOT use dropout for conv layers. Experiments show it gives poor result. 886 | return net 887 | 888 | def encoder(self, x, reuse=False): 889 | """ Builds the network. 890 | Args: 891 | :param x - input x. 892 | :param reuse - whether to reuse old network on create new one. 893 | Returns: 894 | Latent variables z and logits(which will be used if ce_loss is enabled.) 895 | """ 896 | # Conv Layers 897 | net = self.build_conv(x, reuse=reuse) 898 | net = slim.flatten(net) 899 | 900 | # Fully Connected Layer 901 | with slim.arg_scope([slim.fully_connected], reuse=reuse, 902 | weights_initializer=tf.contrib.layers.xavier_initializer(), 903 | weights_regularizer=slim.l2_regularizer(0.0005), 904 | activation_fn=self.activation_fn): 905 | for i, h_unit in enumerate(self.hidden_units): 906 | net = slim.fully_connected(net, h_unit, 907 | normalizer_fn=slim.batch_norm, 908 | scope='enc_full{0}'.format(i)) 909 | if self.dropout: 910 | net = slim.dropout(net, keep_prob=self.keep_prob, is_training=self.is_training, 911 | scope='enc_full_dropout{0}'.format(i)) 912 | 913 | # Latent Variable 914 | # It is very important to batch normalize the output of encoder. 915 | z = slim.fully_connected( 916 | net, self.z_dim, activation_fn=None, 917 | normalizer_fn=slim.batch_norm, 918 | weights_initializer=tf.contrib.layers.xavier_initializer(), 919 | reuse=reuse, scope='enc_z') 920 | 921 | logits = slim.fully_connected( 922 | z, self.y_dim, activation_fn=None, 923 | weights_initializer=tf.contrib.layers.xavier_initializer(), 924 | reuse=reuse, scope='classifier_logits') 925 | 926 | return z, logits 927 | 928 | 929 | 930 | def decoder(self, z, reuse=False): 931 | """ Decoder Network. Experimental and not complete. 932 | Args: 933 | :param z - latent variables z. 934 | :param reuse - whether to reuse old network on create new one. 935 | Returns: 936 | The reconstructed x 937 | """ 938 | net = z 939 | 940 | with slim.arg_scope([slim.fully_connected], 941 | weights_initializer=tf.contrib.layers.xavier_initializer(), 942 | activation_fn=self.activation_fn): 943 | h_dims_revese = [self.hidden_units[i] 944 | for i in range(len(self.hidden_units) - 1, -1, -1)] 945 | for i, num_unit in enumerate(h_dims_revese): 946 | net = slim.fully_connected( 947 | net, num_unit, 948 | normalizer_fn=slim.batch_norm, 949 | reuse=reuse, scope='dec_{0}'.format(i)) 950 | if self.dropout: 951 | net = slim.dropout(net, keep_prob=self.keep_prob, is_training=self.is_training) 952 | 953 | dec_out = slim.fully_connected( 954 | net, self.x_dim[0] * self.x_dim[1] * self.x_ch, activation_fn=tf.nn.tanh, 955 | weights_initializer=tf.contrib.layers.xavier_initializer(), 956 | reuse=reuse, scope='dec_out') 957 | return dec_out 958 | 959 | 960 | 961 | def build_model(self): 962 | """ Builds the network graph. 963 | """ 964 | self.x = tf.placeholder(tf.float32, [None, self.x_dim[0], self.x_dim[1], self.x_ch]) 965 | self.z = tf.placeholder(tf.float32, shape=[None, self.z_dim]) 966 | self.y = tf.placeholder(tf.float32, shape=[None, self.y_dim]) 967 | 968 | self.z, logits = self.encoder(self.x) 969 | 970 | if self.enable_recon_loss: 971 | self.x_recon = self.decoder(self.z) 972 | else: 973 | self.x_recon = None 974 | 975 | # Calculate class mean 976 | self.class_means = self.bucket_mean(self.z, tf.argmax(self.y, axis=1), self.y_dim) 977 | 978 | self.loss_fn_training_op(slim.flatten(self.x), self.y, self.z, 979 | logits, self.x_recon, self.class_means) 980 | 981 | self.pred_prob = tf.nn.softmax(logits=logits) 982 | pred = tf.argmax(self.pred_prob, axis=1) 983 | actual = tf.argmax(self.y, axis=1) 984 | self.acc = tf.reduce_mean(tf.cast(tf.equal(pred, actual), tf.float32)) 985 | 986 | # For Inference, set is_training. Can be done in a better, this should do for now. 987 | self.is_training = False 988 | self.z_test, logits_test = self.encoder(self.x, reuse=True) 989 | self.pred_prob_test = tf.nn.softmax(logits=logits_test) 990 | if self.enable_recon_loss: 991 | self.x_recon_test = self.decoder(self.z_test, reuse=True) 992 | self.is_training = True 993 | --------------------------------------------------------------------------------