├── .gitignore ├── AUTHOR.txt ├── LICENSE ├── README.md ├── model ├── __init__.py ├── bbdropout.py ├── digamma.py ├── layers.py ├── lenet.py ├── sbpdropout.py └── utils ├── scripts ├── lenet_conv │ ├── bbdropout.py │ ├── dbbdropout.py │ ├── model │ ├── pretrain.py │ ├── sbpdropout.py │ └── utils └── lenet_dense │ ├── bbdropout.py │ ├── dbbdropout.py │ ├── model │ ├── pretrain.py │ ├── sbpdropout.py │ └── utils └── utils ├── __init__.py ├── accumulator.py ├── cifar10.py ├── cifar100.py ├── mnist.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.log 3 | *.npy 4 | *.npz 5 | *.ckpt 6 | *.tar 7 | *.out 8 | *.sh 9 | utils/paths.py 10 | **/results/ 11 | /records/ 12 | -------------------------------------------------------------------------------- /AUTHOR.txt: -------------------------------------------------------------------------------- 1 | Copyright 2018 (Institution) under XAI Project supported by Ministry of Science and ICT, Korea 2 | 3 | # This is the list of (Institution) for copyright purposes. 4 | # This does not necessarily list everyone who has contributed code, since in 5 | # some cases, their employer may be the copyright holder. To see the full list 6 | # of contributors, see the revision history in source control 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Learning Network Structure with Dropout 2 | 3 | ### **CONTENT** 4 | > Data-dependent variational dropout for learning a network structure 5 | ### **How to Use** 6 | 7 | ```bash 8 | $ cd ~/[WORKING_DIR]/scripts/lenet_dense 9 | $ python ./pretrain.py 10 | $ python ./bbdropout.py 11 | ``` 12 | 13 | 14 | 15 | # XAI Project 16 | 17 | ### **Project Name** 18 | > A machine learning and statistical inference framework for explainable artificial intelligence(의사결정 이유를 설명할 수 있는 인간 수준의 학습·추론 프레임워크 개발) 19 | ### **Managed by** 20 | > Ministry of Science and ICT/XAIC 21 | ### **Participated Affiliation** 22 | > UNIST, Korea Univ., Yonsei Univ., KAIST., AItrics 23 | ### **Web Site** 24 | > 25 | -------------------------------------------------------------------------------- /model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenXAIProject/Network-Structure-Dropout/723df2d2392ec16eca3452d4afb81d54c4a2f841/model/__init__.py -------------------------------------------------------------------------------- /model/bbdropout.py: -------------------------------------------------------------------------------- 1 | from layers import * 2 | import tensorflow as tf 3 | from tensorflow.contrib.distributions import RelaxedBernoulli 4 | import numpy as np 5 | 6 | digamma = tf.digamma 7 | from digamma import digamma_approx as digamma_approx 8 | lgamma = tf.lgamma 9 | Euler = 0.577215664901532 10 | 11 | def bbdropout(x, training, 12 | alpha=1e-4, thres=1e-2, a_init=-1., tau=1e-1, center_init=1.0, 13 | approx_digamma=True, scale_kl=None, dep=False, 14 | unit_scale=True, collect=True, 15 | name='bbdropout', reuse=None): 16 | 17 | N = tf.shape(x)[0] 18 | K = x.shape[1].value 19 | is_conv = len(x.shape)==4 20 | 21 | with tf.variable_scope(name+'/qpi_vars', reuse=reuse): 22 | with tf.device('/cpu:0'): 23 | a = softplus(tf.get_variable('a_uc', shape=[K], 24 | initializer=tf.constant_initializer(a_init))) 25 | b = softplus(tf.get_variable('b_uc', shape=[K])) 26 | 27 | _digamma = digamma_approx if approx_digamma else digamma 28 | kl = (a-alpha)/a * (-Euler - _digamma(b) - 1/b) \ 29 | + log(a*b) - log(alpha) - (b-1)/b 30 | pi = (1 - tf.random_uniform([K])**(1/b))**(1/a) if training else \ 31 | b*tf.exp(lgamma(1+1/a) + lgamma(b) - lgamma(1+1/a+b)) 32 | 33 | def hard_sigmoid(x): 34 | return tf.clip_by_value(x, thres, 1-thres) 35 | 36 | if dep: 37 | with tf.variable_scope(name+'/pzx_vars', reuse=reuse): 38 | hid = global_avg_pool(x) if is_conv else x 39 | hid = tf.stop_gradient(hid) 40 | with tf.device('/cpu:0'): 41 | hid = layer_norm(hid, scale=False, center=False) 42 | scale = tf.get_variable('scale', shape=[1 if unit_scale else K], 43 | initializer=tf.ones_initializer()) 44 | center = tf.get_variable('center', shape=[K], 45 | initializer=tf.constant_initializer(center_init)) 46 | hid = scale*hid + center 47 | if training: 48 | pi = pi * hard_sigmoid(hid + tf.random_normal(shape=tf.shape(hid))) 49 | z = RelaxedBernoulli(tau, logits=logit(pi)).sample() 50 | else: 51 | pi = pi * hard_sigmoid(hid) 52 | z = tf.where(tf.greater(pi, thres), pi, tf.zeros_like(pi)) 53 | #n_active = tf.reduce_mean( 54 | # tf.reduce_sum(tf.cast(tf.greater(pi, thres), tf.int32), 1)) 55 | n_active = tf.reduce_sum(tf.cast(tf.greater(pi, thres), tf.int32), 1) 56 | n_active = tf.reduce_sum(n_active)/N 57 | else: 58 | if training: 59 | z = RelaxedBernoulli(tau, logits=logit(pi)).sample(N) 60 | else: 61 | pi_ = tf.where(tf.greater(pi, thres), pi, tf.zeros_like(pi)) 62 | z = tf.tile(tf.expand_dims(pi_, 0), [N, 1]) 63 | n_active = tf.reduce_sum(tf.cast(tf.greater(pi, thres), tf.int32)) 64 | 65 | if scale_kl is None: 66 | kl = tf.reduce_sum(kl) 67 | else: 68 | kl = scale_kl * tf.reduce_mean(kl) 69 | 70 | if collect: 71 | if reuse is not True: 72 | tf.add_to_collection('kl', kl) 73 | prefix = 'train_' if training else 'test_' 74 | tf.add_to_collection(prefix+'pi', pi) 75 | tf.add_to_collection(prefix+'n_active', n_active) 76 | 77 | z = tf.reshape(z, ([-1, K, 1, 1] if is_conv else [-1, K])) 78 | return x*z 79 | -------------------------------------------------------------------------------- /model/digamma.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | # @MISC {1446110, 3 | # TITLE = {Approximating the Digamma function}, 4 | # AUTHOR = {njuffa (https://math.stackexchange.com/users/114200/njuffa)}, 5 | # HOWPUBLISHED = {Mathematics Stack Exchange}, 6 | # NOTE = {URL:https://math.stackexchange.com/q/1446110 (version: 2015-09-22)}, 7 | # EPRINT = {https://math.stackexchange.com/q/1446110}, 8 | # URL = {https://math.stackexchange.com/q/1446110}} 9 | 10 | def digamma_approx(x): 11 | def digamma_over_one(x): 12 | return tf.log(x + 0.4849142940227510) \ 13 | - 1/(1.0271785180163817*x) 14 | return digamma_over_one(x+1) - 1./x 15 | -------------------------------------------------------------------------------- /model/layers.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | exp = tf.exp 5 | log = lambda x: tf.log(x + 1e-20) 6 | logit = lambda x: log(x) - log(1-x) 7 | softplus = tf.nn.softplus 8 | softmax = tf.nn.softmax 9 | tanh = tf.nn.tanh 10 | relu = tf.nn.relu 11 | sigmoid = tf.nn.sigmoid 12 | 13 | dense = tf.layers.dense 14 | flatten = tf.contrib.layers.flatten 15 | 16 | def conv(x, filters, kernel_size=3, strides=1, **kwargs): 17 | return tf.layers.conv2d(x, filters, kernel_size, strides, 18 | data_format='channels_first', **kwargs) 19 | 20 | def pool(x, **kwargs): 21 | return tf.layers.max_pooling2d(x, 2, 2, 22 | data_format='channels_first', **kwargs) 23 | 24 | def global_avg_pool(x): 25 | return tf.reduce_mean(x, axis=[2, 3]) 26 | 27 | batch_norm = tf.layers.batch_normalization 28 | layer_norm = tf.contrib.layers.layer_norm 29 | -------------------------------------------------------------------------------- /model/lenet.py: -------------------------------------------------------------------------------- 1 | from layers import * 2 | from utils.train import * 3 | 4 | def lenet_dense(x, y, training, name='lenet', reuse=None, 5 | dropout=None, **dropout_kwargs): 6 | dropout_ = lambda x, subname: x if dropout is None else \ 7 | dropout(x, training, name=name+subname, reuse=reuse, 8 | **dropout_kwargs) 9 | x = dense(dropout_(x, '/dropout1'), 500, activation=relu, 10 | name=name+'/dense1', reuse=reuse) 11 | x = dense(dropout_(x, '/dropout2'), 300, activation=relu, 12 | name=name+'/dense2', reuse=reuse) 13 | x = dense(dropout_(x, '/dropout3'), 10, name=name+'/dense3', reuse=reuse) 14 | 15 | net = {} 16 | all_vars = tf.get_collection('variables', scope=name) 17 | net['qpi_vars'] = [v for v in all_vars if 'qpi_vars' in v.name] 18 | net['pzx_vars'] = [v for v in all_vars if 'pzx_vars' in v.name] 19 | net['weights'] = [v for v in all_vars \ 20 | if 'qpi_vars' not in v.name and 'pzx_vars' not in v.name] 21 | 22 | net['cent'] = cross_entropy(x, y) 23 | net['wd'] = weight_decay(1e-4, var_list=net['weights']) 24 | net['acc'] = accuracy(x, y) 25 | 26 | prefix = 'train_' if training else 'test_' 27 | net['kl'] = tf.get_collection('kl') 28 | net['pi'] = tf.get_collection(prefix+'pi') 29 | net['n_active'] = tf.get_collection(prefix+'n_active') 30 | 31 | return net 32 | 33 | def lenet_conv(x, y, training, name='lenet', reuse=None, 34 | dropout=None, **dropout_kwargs): 35 | dropout_ = lambda x, subname: x if dropout is None else \ 36 | dropout(x, training, name=name+subname, reuse=reuse, 37 | **dropout_kwargs) 38 | x = tf.reshape(x, [-1, 1, 28, 28]) 39 | x = conv(x, 20, 5, name=name+'/conv1', reuse=reuse) 40 | x = relu(dropout_(x, '/dropout1')) 41 | x = pool(x, name=name+'/pool1') 42 | x = conv(x, 50, 5, name=name+'/conv2', reuse=reuse) 43 | x = relu(dropout_(x, '/dropout2')) 44 | x = pool(x, name=name+'/pool2') 45 | x = flatten(x) 46 | x = dense(dropout_(x, '/dropout3'), 500, activation=relu, 47 | name=name+'/dense1', reuse=reuse) 48 | x = dense(dropout_(x, '/dropout4'), 10, name=name+'/dense2', reuse=reuse) 49 | 50 | net = {} 51 | all_vars = tf.get_collection('variables', scope=name) 52 | net['qpi_vars'] = [v for v in all_vars if 'qpi_vars' in v.name] 53 | net['pzx_vars'] = [v for v in all_vars if 'pzx_vars' in v.name] 54 | net['weights'] = [v for v in all_vars \ 55 | if 'qpi_vars' not in v.name and 'pzx_vars' not in v.name] 56 | 57 | net['cent'] = cross_entropy(x, y) 58 | net['wd'] = weight_decay(1e-4, var_list=net['weights']) 59 | net['acc'] = accuracy(x, y) 60 | 61 | prefix = 'train_' if training else 'test_' 62 | net['kl'] = tf.get_collection('kl') 63 | net['pi'] = tf.get_collection(prefix+'pi') 64 | net['n_active'] = tf.get_collection(prefix+'n_active') 65 | 66 | return net 67 | -------------------------------------------------------------------------------- /model/sbpdropout.py: -------------------------------------------------------------------------------- 1 | # copied from https://github.com/necludov/group-sparsity-sbp 2 | import tensorflow as tf 3 | from tensorflow.python.ops.distributions import special_math 4 | import numpy as np 5 | 6 | def phi(x): 7 | return 0.5*tf.erfc(-x/tf.sqrt(2.0)) 8 | 9 | def __erfinv(x): 10 | w = -tf.log((1.0-x)*(1.0+x)-1e-5) 11 | p_small = 2.81022636e-08*tf.ones_like(x) 12 | p_small = 3.43273939e-07 + p_small*(w-2.5) 13 | p_small = -3.5233877e-06 + p_small*(w-2.5) 14 | p_small = -4.39150654e-06 + p_small*(w-2.5) 15 | p_small = 0.00021858087 + p_small*(w-2.5) 16 | p_small = -0.00125372503 + p_small*(w-2.5) 17 | p_small = -0.00417768164 + p_small*(w-2.5) 18 | p_small = 0.246640727 + p_small*(w-2.5) 19 | p_small = 1.50140941 + p_small*(w-2.5) 20 | 21 | p_big = -0.000200214257*tf.ones_like(x) 22 | p_big = 0.000100950558 + p_big*(tf.sqrt(w) - 3.0) 23 | p_big = 0.00134934322 + p_big*(tf.sqrt(w) - 3.0) 24 | p_big = -0.00367342844 + p_big*(tf.sqrt(w) - 3.0) 25 | p_big = 0.00573950773 + p_big*(tf.sqrt(w) - 3.0) 26 | p_big = -0.0076224613 + p_big*(tf.sqrt(w) - 3.0) 27 | p_big = 0.00943887047 + p_big*(tf.sqrt(w) - 3.0) 28 | p_big = 1.00167406 + p_big*(tf.sqrt(w) - 3.0) 29 | p_big = 2.83297682 + p_big*(tf.sqrt(w) - 3.0) 30 | 31 | small_mask = tf.cast(tf.less(w, 5.0*tf.ones_like(w)), tf.float32) 32 | big_mask = tf.cast(tf.greater_equal(w, 5.0*tf.ones_like(w)), tf.float32) 33 | p = p_small*small_mask + p_big*big_mask 34 | return p*x 35 | 36 | def erfinv(x): 37 | return special_math.ndtri((x+1.)/2.0)/tf.sqrt(2.) 38 | 39 | def erfcx(x): 40 | """M. M. Shepherd and J. G. Laframboise, 41 | MATHEMATICS OF COMPUTATION 36, 249 (1981) 42 | """ 43 | K = 3.75 44 | y = (tf.abs(x)-K) / (tf.abs(x)+K) 45 | y2 = 2.0*y 46 | (d, dd) = (-0.4e-20, 0.0) 47 | (d, dd) = (y2 * d - dd + 0.3e-20, d) 48 | (d, dd) = (y2 * d - dd + 0.97e-19, d) 49 | (d, dd) = (y2 * d - dd + 0.27e-19, d) 50 | (d, dd) = (y2 * d - dd + -0.2187e-17, d) 51 | (d, dd) = (y2 * d - dd + -0.2237e-17, d) 52 | (d, dd) = (y2 * d - dd + 0.50681e-16, d) 53 | (d, dd) = (y2 * d - dd + 0.74182e-16, d) 54 | (d, dd) = (y2 * d - dd + -0.1250795e-14, d) 55 | (d, dd) = (y2 * d - dd + -0.1864563e-14, d) 56 | (d, dd) = (y2 * d - dd + 0.33478119e-13, d) 57 | (d, dd) = (y2 * d - dd + 0.32525481e-13, d) 58 | (d, dd) = (y2 * d - dd + -0.965469675e-12, d) 59 | (d, dd) = (y2 * d - dd + 0.194558685e-12, d) 60 | (d, dd) = (y2 * d - dd + 0.28687950109e-10, d) 61 | (d, dd) = (y2 * d - dd + -0.63180883409e-10, d) 62 | (d, dd) = (y2 * d - dd + -0.775440020883e-09, d) 63 | (d, dd) = (y2 * d - dd + 0.4521959811218e-08, d) 64 | (d, dd) = (y2 * d - dd + 0.10764999465671e-07, d) 65 | (d, dd) = (y2 * d - dd + -0.218864010492344e-06, d) 66 | (d, dd) = (y2 * d - dd + 0.774038306619849e-06, d) 67 | (d, dd) = (y2 * d - dd + 0.4139027986073010e-05, d) 68 | (d, dd) = (y2 * d - dd + -0.69169733025012064e-04, d) 69 | (d, dd) = (y2 * d - dd + 0.490775836525808632e-03, d) 70 | (d, dd) = (y2 * d - dd + -0.2413163540417608191e-02, d) 71 | (d, dd) = (y2 * d - dd + 0.9074997670705265094e-02, d) 72 | (d, dd) = (y2 * d - dd + -0.26658668435305752277e-01, d) 73 | (d, dd) = (y2 * d - dd + 0.59209939998191890498e-01, d) 74 | (d, dd) = (y2 * d - dd + -0.84249133366517915584e-01, d) 75 | (d, dd) = (y2 * d - dd + -0.4590054580646477331e-02, d) 76 | d = y * d - dd + 0.1177578934567401754080e+01 77 | result = d/(1.0+2.0*tf.abs(x)) 78 | result = tf.where(tf.is_nan(result), tf.ones_like(result), result) 79 | result = tf.where(tf.is_inf(result), tf.ones_like(result), result) 80 | 81 | negative_mask = tf.cast(tf.less(x, 0.0), tf.float32) 82 | positive_mask = tf.cast(tf.greater_equal(x, 0.0), tf.float32) 83 | negative_result = 2.0*tf.exp(x*x)-result 84 | negative_result = tf.where(tf.is_nan(negative_result), tf.ones_like(negative_result), negative_result) 85 | negative_result = tf.where(tf.is_inf(negative_result), tf.ones_like(negative_result), negative_result) 86 | result = negative_mask * negative_result + positive_mask * result 87 | return result 88 | 89 | def phi_inv(x): 90 | return tf.sqrt(2.0)*erfinv(2.0*x-1) 91 | 92 | def mean_truncated_log_normal_straight(mu, sigma, a, b): 93 | alpha = (a - mu)/sigma 94 | beta = (b - mu)/sigma 95 | z = phi(beta) - phi(alpha) 96 | mean = tf.exp(mu+sigma*sigma/2.0)/z*(phi(sigma-alpha) - phi(sigma-beta)) 97 | return mean 98 | 99 | def mean_truncated_log_normal_reduced(mu, sigma, a, b): 100 | alpha = (a - mu)/sigma 101 | beta = (b - mu)/sigma 102 | z = phi(beta) - phi(alpha) 103 | mean = erfcx((sigma-beta)/tf.sqrt(2.0))*tf.exp(b-beta*beta/2) 104 | mean = mean - erfcx((sigma-alpha)/tf.sqrt(2.0))*tf.exp(a-alpha*alpha/2) 105 | mean = mean/(2*z) 106 | return mean 107 | 108 | def mean_truncated_log_normal(mu, sigma, a, b): 109 | return mean_truncated_log_normal_reduced(mu, sigma, a, b) 110 | 111 | def median_truncated_log_normal(mu, sigma, a, b): 112 | alpha = (a - mu)/sigma 113 | beta = (b - mu)/sigma 114 | gamma = phi(alpha)+0.5*(phi(beta)-phi(alpha)) 115 | return tf.exp(phi_inv(gamma)*sigma+mu) 116 | 117 | def snr_truncated_log_normal(mu, sigma, a, b): 118 | alpha = (a - mu)/sigma 119 | beta = (b - mu)/sigma 120 | z = phi(beta) - phi(alpha) 121 | ratio = erfcx((sigma-beta)/tf.sqrt(2.0))*tf.exp((b-mu)-beta**2/2.0) 122 | ratio = ratio - erfcx((sigma-alpha)/tf.sqrt(2.0))*tf.exp((a-mu)-alpha**2/2.0) 123 | denominator = 2*z*erfcx((2.0*sigma-beta)/tf.sqrt(2.0))*tf.exp(2.0*(b-mu)-beta**2/2.0) 124 | denominator = denominator - 2*z*erfcx((2.0*sigma-alpha)/tf.sqrt(2.0))*tf.exp(2.0*(a-mu)-alpha**2/2.0) 125 | denominator = denominator - ratio**2 126 | ratio = ratio/tf.sqrt(denominator) 127 | return ratio 128 | 129 | def sample_truncated_normal(mu, sigma, a, b): 130 | alpha = (a - mu)/sigma 131 | beta = (b - mu)/sigma 132 | gamma = phi(alpha)+tf.random_uniform(mu.shape)*(phi(beta)-phi(alpha)) 133 | return tf.clip_by_value(phi_inv(tf.clip_by_value(gamma, 1e-5, 1.0-1e-5))*sigma+mu, a, b) 134 | 135 | def sbpdropout(x, training, 136 | thres=1.0, scale_kl=None, collect=True, 137 | name='sbpdropout', reuse=None): 138 | 139 | min_log = -20.0 140 | max_log = 0.0 141 | 142 | axis = 1 143 | 144 | params_shape = np.ones(x.get_shape().ndims) 145 | params_shape[axis] = x.get_shape()[axis].value 146 | 147 | with tf.variable_scope(name+'/qpi_vars', reuse=reuse): 148 | with tf.device('/cpu:0'): 149 | mu = tf.get_variable('mu', shape=params_shape.tolist(), 150 | initializer=tf.zeros_initializer()) 151 | log_sigma = tf.get_variable('log_sigma', shape=params_shape.tolist(), 152 | initializer=tf.constant_initializer(-5.0)) 153 | 154 | mu = tf.clip_by_value(mu, -20.0, 5.0) 155 | log_sigma = tf.clip_by_value(log_sigma, -20.0, 5.0) 156 | sigma = tf.exp(log_sigma) 157 | 158 | # adding loss 159 | alpha = (min_log-mu)/sigma 160 | beta = (max_log-mu)/sigma 161 | z = phi(beta) - phi(alpha) 162 | 163 | def pdf(x): 164 | return tf.exp(-x*x/2.0)/tf.sqrt(2.0*np.pi) 165 | kl = -log_sigma-tf.log(z)-(alpha*pdf(alpha)-beta*pdf(beta))/(2.0*z) 166 | kl = kl+tf.log(max_log-min_log)-tf.log(2.0*np.pi*np.e)/2.0 167 | if scale_kl is None: 168 | kl = tf.reduce_sum(kl) 169 | else: 170 | kl = scale_kl*tf.reduce_mean(kl) 171 | 172 | if training: 173 | z = tf.exp(sample_truncated_normal(mu, sigma, min_log, max_log)) 174 | else: 175 | z = mean_truncated_log_normal(mu, sigma, min_log, max_log) 176 | snr = snr_truncated_log_normal(mu, sigma, min_log, max_log) 177 | mask = tf.cast(tf.greater(snr, thres*tf.ones_like(snr)), tf.float32) 178 | 179 | n_active = tf.reduce_sum(tf.cast(mask, tf.int32)) 180 | 181 | if collect: 182 | if reuse is not True: 183 | tf.add_to_collection('kl', kl) 184 | prefix = 'train_' if training else 'test_' 185 | tf.add_to_collection(prefix+'p', snr) 186 | tf.add_to_collection(prefix+'n_active', n_active) 187 | 188 | if not training: 189 | z = mask*z 190 | 191 | return x*z 192 | -------------------------------------------------------------------------------- /model/utils: -------------------------------------------------------------------------------- 1 | ../utils/ -------------------------------------------------------------------------------- /scripts/lenet_conv/bbdropout.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import tensorflow as tf 3 | from tensorflow.examples.tutorials.mnist import input_data 4 | from model.lenet import lenet_conv 5 | from model.bbdropout import bbdropout 6 | from utils.accumulator import Accumulator 7 | from utils.train import * 8 | from utils.mnist import mnist_input 9 | import time 10 | import os 11 | import argparse 12 | import csv 13 | from pylab import * 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--batch_size', type=int, default=100) 17 | parser.add_argument('--n_epochs', type=int, default=200) 18 | parser.add_argument('--save_freq', type=int, default=20) 19 | parser.add_argument('--savedir', type=str, default=None) 20 | parser.add_argument('--pretraindir', type=str, default=None) 21 | parser.add_argument('--mode', type=str, default='train') 22 | parser.add_argument('--gpu_num', type=int, default=0) 23 | parser.add_argument('--csvfn', type=str, default=None) 24 | args = parser.parse_args() 25 | 26 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' 27 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num) 28 | 29 | pretraindir = './results/pretrained' if args.pretraindir is None else args.pretraindir 30 | savedir = './results/bbdropout/sample_run' if args.savedir is None else args.savedir 31 | if not os.path.isdir(savedir): 32 | os.makedirs(savedir) 33 | 34 | batch_size = args.batch_size 35 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size) 36 | x = tf.placeholder(tf.float32, [None, 784]) 37 | y = tf.placeholder(tf.float32, [None, 10]) 38 | N = mnist.train.num_examples 39 | scale_kl = 1e-2*N 40 | dropout = bbdropout 41 | net = lenet_conv(x, y, True, dropout=dropout, scale_kl=scale_kl) 42 | tnet = lenet_conv(x, y, False, reuse=True, dropout=dropout, 43 | scale_kl=scale_kl) 44 | 45 | def train(): 46 | loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd'] 47 | global_step = tf.train.get_or_create_global_step() 48 | bdr = [int(n_train_batches*(args.n_epochs-1)*r) for r in [0.5, 0.75]] 49 | vals = [1e-2, 1e-3, 1e-4] 50 | lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals) 51 | train_op1 = tf.train.AdamOptimizer(lr).minimize(loss, 52 | var_list=net['qpi_vars'], global_step=global_step) 53 | train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss, 54 | var_list=net['weights']) 55 | train_op = tf.group(train_op1, train_op2) 56 | 57 | pretrain_saver = tf.train.Saver(net['weights']) 58 | saver = tf.train.Saver(net['weights']+net['qpi_vars']) 59 | logfile = open(os.path.join(savedir, 'train.log'), 'w', 0) 60 | 61 | sess = tf.Session() 62 | sess.run(tf.global_variables_initializer()) 63 | pretrain_saver.restore(sess, os.path.join(pretraindir, 'model')) 64 | 65 | train_logger = Accumulator('cent', 'acc') 66 | train_to_run = [train_op, net['cent'], net['acc']] 67 | test_logger = Accumulator('cent', 'acc') 68 | test_to_run = [tnet['cent'], tnet['acc']] 69 | for i in range(args.n_epochs): 70 | line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr)) 71 | print(line) 72 | logfile.write(line + '\n') 73 | train_logger.clear() 74 | start = time.time() 75 | for j in range(n_train_batches): 76 | bx, by = mnist.train.next_batch(batch_size) 77 | train_logger.accum(sess.run(train_to_run, {x:bx, y:by})) 78 | train_logger.print_(header='train', epoch=i+1, 79 | time=time.time()-start, logfile=logfile) 80 | 81 | test_logger.clear() 82 | for j in range(n_test_batches): 83 | bx, by = mnist.test.next_batch(batch_size) 84 | test_logger.accum(sess.run(test_to_run, {x:bx, y:by})) 85 | test_logger.print_(header='test', epoch=i+1, 86 | time=time.time()-start, logfile=logfile) 87 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n' 88 | line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n' 89 | print(line) 90 | logfile.write(line+'\n') 91 | if (i+1) % args.save_freq == 0: 92 | saver.save(sess, os.path.join(savedir, 'model')) 93 | 94 | logfile.close() 95 | saver.save(sess, os.path.join(savedir, 'model')) 96 | 97 | def test(): 98 | sess = tf.Session() 99 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars']) 100 | saver.restore(sess, os.path.join(savedir, 'model')) 101 | logger = Accumulator('cent', 'acc') 102 | to_run = [tnet['cent'], tnet['acc']] 103 | for j in range(n_test_batches): 104 | bx, by = mnist.test.next_batch(batch_size) 105 | logger.accum(sess.run(to_run, {x:bx, y:by})) 106 | logger.print_(header='test') 107 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n' 108 | line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n' 109 | print(line) 110 | 111 | def visualize(): 112 | sess = tf.Session() 113 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']) 114 | saver.restore(sess, os.path.join(savedir, 'model')) 115 | 116 | n_drop = len(tnet['n_active']) 117 | fig = figure('pi') 118 | axarr = fig.subplots(n_drop) 119 | for i in range(n_drop): 120 | np_pi = sess.run(tnet['pi'][i]).reshape((1,-1)) 121 | im = axarr[i].imshow(np_pi, cmap='gray', aspect='auto') 122 | axarr[i].yaxis.set_visible(False) 123 | axarr[i].xaxis.set_major_locator(MaxNLocator(integer=True)) 124 | if i == n_drop-1: 125 | axarr[i].set_xlabel('neurons') 126 | fig.colorbar(im, ax=axarr[i]) 127 | show() 128 | 129 | def record(): 130 | sess = tf.Session() 131 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']) 132 | saver.restore(sess, os.path.join(savedir, 'model')) 133 | logger = Accumulator('cent', 'acc') 134 | to_run = [tnet['cent'], tnet['acc']] 135 | for j in range(n_test_batches): 136 | bx, by = mnist.test.next_batch(batch_size) 137 | logger.accum(sess.run(to_run, {x:bx, y:by})) 138 | np_n_active = sess.run(tnet['n_active']) 139 | 140 | if not os.path.isdir('../../records'): 141 | os.makedirs('../../records') 142 | csvfn = os.path.join('../../records', 143 | 'bbdropout_lenet_conv.csv' if args.csvfn is None else args.csvfn) 144 | 145 | if csvfn is not None: 146 | flag = 'a' if os.path.exists(csvfn) else 'w' 147 | with open(csvfn, flag) as f: 148 | writer = csv.writer(f) 149 | if flag=='w': 150 | writer.writerow(['savedir', 'cent', 'acc', 'n_active']) 151 | line = [savedir] 152 | line.append('%.4f' % logger.get('cent')) 153 | line.append('%.4f' % logger.get('acc')) 154 | line.append('-'.join(str(x) for x in np_n_active)) 155 | writer.writerow(line) 156 | 157 | if __name__=='__main__': 158 | if args.mode == 'train': 159 | train() 160 | elif args.mode == 'test': 161 | test() 162 | elif args.mode == 'vis': 163 | visualize() 164 | elif args.mode == 'record': 165 | record() 166 | else: 167 | raise ValueError('Invalid mode %s' % args.mode) 168 | -------------------------------------------------------------------------------- /scripts/lenet_conv/dbbdropout.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import tensorflow as tf 3 | from tensorflow.examples.tutorials.mnist import input_data 4 | from model.lenet import lenet_conv 5 | from model.bbdropout import bbdropout 6 | from utils.accumulator import Accumulator 7 | from utils.train import * 8 | from utils.mnist import mnist_input 9 | import time 10 | import os 11 | import argparse 12 | import csv 13 | import matplotlib 14 | matplotlib.use('Agg') 15 | import matplotlib.pyplot as plt 16 | 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--batch_size', type=int, default=100) 19 | parser.add_argument('--n_epochs', type=int, default=200) 20 | parser.add_argument('--save_freq', type=int, default=20) 21 | parser.add_argument('--vis_freq', type=int, default=20) 22 | parser.add_argument('--center_init', type=float, default=1.0) 23 | parser.add_argument('--pretraindir', type=str, default=None) 24 | parser.add_argument('--savedir', type=str, default=None) 25 | parser.add_argument('--mode', type=str, default='train') 26 | parser.add_argument('--gpu_num', type=int, default=0) 27 | parser.add_argument('--csvfn', type=str, default=None) 28 | args = parser.parse_args() 29 | 30 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' 31 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num) 32 | 33 | pretraindir = './results/bbdropout/sample_run' if args.pretraindir is None else args.pretraindir 34 | savedir = './results/dbbdropout/sample_run' if args.savedir is None else args.savedir 35 | if not os.path.isdir(savedir): 36 | os.makedirs(savedir) 37 | figdir = os.path.join(savedir, 'figs') 38 | if not os.path.isdir(figdir): 39 | os.makedirs(figdir) 40 | 41 | batch_size = args.batch_size 42 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size) 43 | x = tf.placeholder(tf.float32, [None, 784]) 44 | y = tf.placeholder(tf.float32, [None, 10]) 45 | N = mnist.train.num_examples 46 | scale_kl = 1e-2*N 47 | center_init = args.center_init 48 | net = lenet_conv(x, y, True, dropout=bbdropout, scale_kl=scale_kl, 49 | dep=True, center_init=center_init) 50 | tnet = lenet_conv(x, y, False, reuse=True, 51 | dropout=bbdropout, scale_kl=scale_kl, 52 | dep=True, center_init=center_init) 53 | n_drop = len(tnet['n_active']) 54 | 55 | def train(): 56 | loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd'] 57 | global_step = tf.train.get_or_create_global_step() 58 | bdr = [int(n_train_batches*(args.n_epochs-1)*r) for r in [0.5, 0.75]] 59 | vals = [1e-2, 1e-3, 1e-4] 60 | lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals) 61 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 62 | with tf.control_dependencies(update_ops): 63 | train_op1 = tf.train.AdamOptimizer(lr).minimize(loss, 64 | var_list=net['pzx_vars'], global_step=global_step) 65 | train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss, 66 | var_list=net['weights']) 67 | train_op = tf.group(train_op1, train_op2) 68 | 69 | pretrain_saver = tf.train.Saver(net['weights']+net['qpi_vars']) 70 | saver = tf.train.Saver(net['weights']+net['qpi_vars']+net['pzx_vars']) 71 | logfile = open(os.path.join(savedir, 'train.log'), 'w', 0) 72 | 73 | sess = tf.Session() 74 | sess.run(tf.global_variables_initializer()) 75 | pretrain_saver.restore(sess, os.path.join(pretraindir, 'model')) 76 | 77 | train_logger = Accumulator('cent', 'acc') 78 | train_to_run = [train_op, net['cent'], net['acc']] 79 | test_logger = Accumulator('cent', 'acc') 80 | test_to_run = [tnet['cent'], tnet['acc']] + tnet['n_active'] 81 | for i in range(args.n_epochs): 82 | line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr)) 83 | print(line) 84 | logfile.write(line + '\n') 85 | train_logger.clear() 86 | start = time.time() 87 | for j in range(n_train_batches): 88 | bx, by = mnist.train.next_batch(batch_size) 89 | train_logger.accum(sess.run(train_to_run, {x:bx, y:by})) 90 | train_logger.print_(header='train', epoch=i+1, 91 | time=time.time()-start, logfile=logfile) 92 | 93 | test_logger.clear() 94 | np_n_active = [0]*n_drop 95 | for j in range(n_test_batches): 96 | bx, by = mnist.test.next_batch(batch_size) 97 | res = sess.run(test_to_run, {x:bx, y:by}) 98 | test_logger.accum(res[:-n_drop]) 99 | np_n_active = [a + b for a, b in zip(np_n_active, res[-n_drop:])] 100 | test_logger.print_(header='test', epoch=i+1, 101 | time=time.time()-start, logfile=logfile) 102 | np_n_active = [int(a/n_test_batches) for a in np_n_active] 103 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n' 104 | line += 'n_active: ' + str(np_n_active) + '\n' 105 | print(line) 106 | logfile.write(line+'\n') 107 | 108 | if (i+1) % args.save_freq == 0: 109 | saver.save(sess, os.path.join(savedir, 'model')) 110 | 111 | if (i+1)%args.vis_freq == 0: 112 | fig = _visualize(sess) 113 | fig.savefig(os.path.join(figdir, 'epoch%d.png'%(i+1)), dpi=200) 114 | 115 | saver.save(sess, os.path.join(savedir, 'model')) 116 | logfile.close() 117 | 118 | def test(): 119 | sess = tf.Session() 120 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars']) 121 | saver.restore(sess, os.path.join(savedir, 'model')) 122 | logger = Accumulator('cent', 'acc') 123 | to_run = [tnet['cent'], tnet['acc']] + tnet['n_active'] 124 | np_n_active = [0]*n_drop 125 | for j in range(n_test_batches): 126 | bx, by = mnist.test.next_batch(batch_size) 127 | res = sess.run(to_run, {x:bx, y:by}) 128 | logger.accum(res[:-n_drop]) 129 | np_n_active = [a + b for a, b in zip(np_n_active, res[-n_drop:])] 130 | np_n_active = [int(a/n_test_batches) for a in np_n_active] 131 | logger.print_(header='test') 132 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n' 133 | line += 'n_active:' + str(np_n_active) + '\n' 134 | print(line) 135 | 136 | def _visualize(sess): 137 | pi_csum = [tf.matmul(y, pi, transpose_a=True) for pi in tnet['pi']] 138 | csum = tf.expand_dims(tf.reduce_sum(y, 0), 1) 139 | 140 | np_pi_csum = [0]*n_drop 141 | np_csum = 0 142 | for j in range(n_test_batches): 143 | bx, by = mnist.test.next_batch(args.batch_size) 144 | A, B = sess.run([pi_csum, csum], {x:bx, y:by}) 145 | for k in range(len(pi_csum)): 146 | np_pi_csum[k] += A[k] 147 | np_csum += B 148 | 149 | fig = plt.figure('vis') 150 | axarr = fig.subplots(n_drop) 151 | for i in range(n_drop): 152 | im = axarr[i].imshow(np_pi_csum[i]/np_csum, cmap='gray', aspect='auto') 153 | fig.colorbar(im, ax=axarr[i]) 154 | return fig 155 | 156 | def visualize(): 157 | sess = tf.Session() 158 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars']) 159 | saver.restore(sess, os.path.join(savedir, 'model')) 160 | _visualize(sess) 161 | plt.show() 162 | 163 | def record(): 164 | sess = tf.Session() 165 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars']) 166 | saver.restore(sess, os.path.join(savedir, 'model')) 167 | logger = Accumulator('cent', 'acc') 168 | to_run = [tnet['cent'], tnet['acc']] + tnet['n_active'] 169 | np_n_active = [0]*n_drop 170 | for j in range(n_test_batches): 171 | bx, by = mnist.test.next_batch(batch_size) 172 | res = sess.run(to_run, {x:bx, y:by}) 173 | logger.accum(res[:-n_drop]) 174 | np_n_active = [a + b for a, b in zip(np_n_active, res[-n_drop:])] 175 | np_n_active = [int(a/n_test_batches) for a in np_n_active] 176 | 177 | if not os.path.isdir('../../records'): 178 | os.makedirs('../../records') 179 | csvfn = os.path.join('../../records', 180 | 'dbbdropout_lenet_conv.csv' if args.csvfn is None else args.csvfn) 181 | 182 | if csvfn is not None: 183 | flag = 'a' if os.path.exists(csvfn) else 'w' 184 | with open(csvfn, flag) as f: 185 | writer = csv.writer(f) 186 | if flag=='w': 187 | writer.writerow(['savedir', 'cent', 'acc', 'n_active']) 188 | line = [savedir] 189 | line.append('%.4f' % logger.get('cent')) 190 | line.append('%.4f' % logger.get('acc')) 191 | line.append('-'.join(str(x) for x in np_n_active)) 192 | writer.writerow(line) 193 | 194 | if __name__=='__main__': 195 | if args.mode == 'train': 196 | train() 197 | elif args.mode == 'test': 198 | test() 199 | elif args.mode == 'vis': 200 | visualize() 201 | elif args.mode == 'record': 202 | record() 203 | else: 204 | raise ValueError('Invalid mode %s' % args.mode) 205 | -------------------------------------------------------------------------------- /scripts/lenet_conv/model: -------------------------------------------------------------------------------- 1 | ../../model/ -------------------------------------------------------------------------------- /scripts/lenet_conv/pretrain.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import tensorflow as tf 3 | from tensorflow.examples.tutorials.mnist import input_data 4 | from model.lenet import lenet_conv 5 | from utils.accumulator import Accumulator 6 | from utils.train import * 7 | from utils.mnist import mnist_input 8 | import time 9 | import os 10 | import argparse 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument('--batch_size', type=int, default=100) 14 | parser.add_argument('--n_epochs', type=int, default=200) 15 | parser.add_argument('--save_freq', type=int, default=20) 16 | parser.add_argument('--savedir', type=str, default=None) 17 | parser.add_argument('--mode', type=str, default='train') 18 | parser.add_argument('--gpu_num', type=int, default=0) 19 | args = parser.parse_args() 20 | 21 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' 22 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num) 23 | 24 | savedir = './results/pretrained' if args.savedir is None else args.savedir 25 | if not os.path.isdir(savedir): 26 | os.makedirs(savedir) 27 | 28 | batch_size = args.batch_size 29 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size) 30 | x = tf.placeholder(tf.float32, [None, 784]) 31 | y = tf.placeholder(tf.float32, [None, 10]) 32 | net = lenet_conv(x, y, True) 33 | tnet = lenet_conv(x, y, False, reuse=True) 34 | 35 | def train(): 36 | loss = net['cent'] + net['wd'] 37 | global_step = tf.train.get_or_create_global_step() 38 | lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), 39 | [n_train_batches*args.n_epochs/2], [1e-4, 1e-5]) 40 | train_op = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step) 41 | 42 | saver = tf.train.Saver(net['weights']) 43 | logfile = open(os.path.join(savedir, 'train.log'), 'w', 0) 44 | 45 | sess = tf.Session() 46 | sess.run(tf.global_variables_initializer()) 47 | 48 | train_logger = Accumulator('cent', 'acc') 49 | train_to_run = [train_op, net['cent'], net['acc']] 50 | test_logger = Accumulator('cent', 'acc') 51 | test_to_run = [tnet['cent'], tnet['acc']] 52 | for i in range(args.n_epochs): 53 | line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr)) 54 | print(line) 55 | logfile.write(line + '\n') 56 | train_logger.clear() 57 | start = time.time() 58 | for j in range(n_train_batches): 59 | bx, by = mnist.train.next_batch(batch_size) 60 | train_logger.accum(sess.run(train_to_run, {x:bx, y:by})) 61 | train_logger.print_(header='train', epoch=i+1, 62 | time=time.time()-start, logfile=logfile) 63 | 64 | test_logger.clear() 65 | for j in range(n_test_batches): 66 | bx, by = mnist.test.next_batch(batch_size) 67 | test_logger.accum(sess.run(test_to_run, {x:bx, y:by})) 68 | test_logger.print_(header='test', epoch=i+1, 69 | time=time.time()-start, logfile=logfile) 70 | print() 71 | logfile.write('\n') 72 | if (i+1)%args.save_freq == 0: 73 | saver.save(sess, os.path.join(savedir, 'model')) 74 | 75 | logfile.close() 76 | saver.save(sess, os.path.join(savedir, 'model')) 77 | 78 | def test(): 79 | sess = tf.Session() 80 | saver = tf.train.Saver(tnet['weights']) 81 | saver.restore(sess, os.path.join(savedir, 'model')) 82 | logger = Accumulator('cent', 'acc') 83 | to_run = [tnet['cent'], tnet['acc']] 84 | for j in range(n_test_batches): 85 | bx, by = mnist.test.next_batch(batch_size) 86 | logger.accum(sess.run(to_run, {x:bx, y:by})) 87 | logger.print_(header='test') 88 | 89 | if __name__=='__main__': 90 | if args.mode == 'train': 91 | train() 92 | elif args.mode == 'test': 93 | test() 94 | else: 95 | raise ValueError('Invalid mode %s' % args.mode) 96 | -------------------------------------------------------------------------------- /scripts/lenet_conv/sbpdropout.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import tensorflow as tf 3 | from tensorflow.examples.tutorials.mnist import input_data 4 | from model.lenet import lenet_conv 5 | from model.sbpdropout import sbpdropout 6 | from utils.accumulator import Accumulator 7 | from utils.train import * 8 | from utils.mnist import mnist_input 9 | import time 10 | import os 11 | import argparse 12 | import csv 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--batch_size', type=int, default=100) 16 | parser.add_argument('--n_epochs', type=int, default=200) 17 | parser.add_argument('--save_freq', type=int, default=20) 18 | parser.add_argument('--savedir', type=str, default=None) 19 | parser.add_argument('--pretraindir', type=str, default=None) 20 | parser.add_argument('--mode', type=str, default='train') 21 | parser.add_argument('--gpu_num', type=int, default=0) 22 | parser.add_argument('--csvfn', type=str, default=None) 23 | args = parser.parse_args() 24 | 25 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' 26 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num) 27 | 28 | pretraindir = './results/pretrained' if args.pretraindir is None else args.pretraindir 29 | savedir = './results/bbdropout/sample_run' if args.savedir is None else args.savedir 30 | if not os.path.isdir(savedir): 31 | os.makedirs(savedir) 32 | 33 | batch_size = args.batch_size 34 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size) 35 | x = tf.placeholder(tf.float32, [None, 784]) 36 | y = tf.placeholder(tf.float32, [None, 10]) 37 | N = mnist.train.num_examples 38 | scale_kl = 1e-2*N 39 | dropout = sbpdropout 40 | net = lenet_conv(x, y, True, dropout=dropout, scale_kl=scale_kl) 41 | tnet = lenet_conv(x, y, False, reuse=True, dropout=dropout, 42 | scale_kl=scale_kl) 43 | 44 | def train(): 45 | loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd'] 46 | global_step = tf.train.get_or_create_global_step() 47 | bdr = [int(n_train_batches*(args.n_epochs-1)*r) for r in [0.5, 0.75]] 48 | vals = [1e-2, 1e-3, 1e-4] 49 | lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals) 50 | train_op1 = tf.train.AdamOptimizer(lr).minimize(loss, 51 | var_list=net['qpi_vars'], global_step=global_step) 52 | train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss, 53 | var_list=net['weights']) 54 | train_op = tf.group(train_op1, train_op2) 55 | 56 | pretrain_saver = tf.train.Saver(net['weights']) 57 | saver = tf.train.Saver(net['weights']+net['qpi_vars']) 58 | logfile = open(os.path.join(savedir, 'train.log'), 'w', 0) 59 | 60 | sess = tf.Session() 61 | sess.run(tf.global_variables_initializer()) 62 | pretrain_saver.restore(sess, os.path.join(pretraindir, 'model')) 63 | 64 | train_logger = Accumulator('cent', 'acc') 65 | train_to_run = [train_op, net['cent'], net['acc']] 66 | test_logger = Accumulator('cent', 'acc') 67 | test_to_run = [tnet['cent'], tnet['acc']] 68 | for i in range(args.n_epochs): 69 | line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr)) 70 | print(line) 71 | logfile.write(line + '\n') 72 | train_logger.clear() 73 | start = time.time() 74 | for j in range(n_train_batches): 75 | bx, by = mnist.train.next_batch(batch_size) 76 | train_logger.accum(sess.run(train_to_run, {x:bx, y:by})) 77 | train_logger.print_(header='train', epoch=i+1, 78 | time=time.time()-start, logfile=logfile) 79 | 80 | test_logger.clear() 81 | for j in range(n_test_batches): 82 | bx, by = mnist.test.next_batch(batch_size) 83 | test_logger.accum(sess.run(test_to_run, {x:bx, y:by})) 84 | test_logger.print_(header='test', epoch=i+1, 85 | time=time.time()-start, logfile=logfile) 86 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n' 87 | line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n' 88 | print(line) 89 | logfile.write(line+'\n') 90 | if (i+1)%args.save_freq == 0: 91 | saver.save(sess, os.path.join(savedir, 'model')) 92 | 93 | logfile.close() 94 | saver.save(sess, os.path.join(savedir, 'model')) 95 | 96 | def test(): 97 | sess = tf.Session() 98 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars']) 99 | saver.restore(sess, os.path.join(savedir, 'model')) 100 | logger = Accumulator('cent', 'acc') 101 | to_run = [tnet['cent'], tnet['acc']] 102 | for j in range(n_test_batches): 103 | bx, by = mnist.test.next_batch(batch_size) 104 | logger.accum(sess.run(to_run, {x:bx, y:by})) 105 | logger.print_(header='test') 106 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n' 107 | line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n' 108 | print(line) 109 | 110 | def record(): 111 | sess = tf.Session() 112 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']) 113 | saver.restore(sess, os.path.join(savedir, 'model')) 114 | logger = Accumulator('cent', 'acc') 115 | to_run = [tnet['cent'], tnet['acc']] 116 | for j in range(n_test_batches): 117 | bx, by = mnist.test.next_batch(batch_size) 118 | logger.accum(sess.run(to_run, {x:bx, y:by})) 119 | np_n_active = sess.run(tnet['n_active']) 120 | 121 | if not os.path.isdir('../../records'): 122 | os.makedirs('../../records') 123 | csvfn = os.path.join('../../records', 124 | 'sbpdropout_lenet_conv.csv' if args.csvfn is None else args.csvfn) 125 | 126 | if csvfn is not None: 127 | flag = 'a' if os.path.exists(csvfn) else 'w' 128 | with open(csvfn, flag) as f: 129 | writer = csv.writer(f) 130 | if flag=='w': 131 | writer.writerow(['savedir', 'cent', 'acc', 'n_active']) 132 | line = [savedir] 133 | line.append('%.4f' % logger.get('cent')) 134 | line.append('%.4f' % logger.get('acc')) 135 | line.append('-'.join(str(x) for x in np_n_active)) 136 | writer.writerow(line) 137 | 138 | if __name__=='__main__': 139 | if args.mode == 'train': 140 | train() 141 | elif args.mode == 'test': 142 | test() 143 | elif args.mode == 'record': 144 | record() 145 | else: 146 | raise ValueError('Invalid mode %s' % args.mode) 147 | -------------------------------------------------------------------------------- /scripts/lenet_conv/utils: -------------------------------------------------------------------------------- 1 | ../../utils/ -------------------------------------------------------------------------------- /scripts/lenet_dense/bbdropout.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from tensorflow.examples.tutorials.mnist import input_data 3 | from model.lenet import lenet_dense 4 | from model.bbdropout import bbdropout 5 | from utils.accumulator import Accumulator 6 | from utils.train import * 7 | from utils.mnist import mnist_input 8 | import time 9 | import os 10 | import argparse 11 | import csv 12 | from pylab import * 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--batch_size', type=int, default=100) 16 | parser.add_argument('--n_epochs', type=int, default=200) 17 | parser.add_argument('--save_freq', type=int, default=20) 18 | parser.add_argument('--savedir', type=str, default=None) 19 | parser.add_argument('--pretraindir', type=str, default=None) 20 | parser.add_argument('--mode', type=str, default='train') 21 | parser.add_argument('--gpu_num', type=int, default=0) 22 | parser.add_argument('--csvfn', type=str, default=None) 23 | args = parser.parse_args() 24 | 25 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' 26 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num) 27 | 28 | pretraindir = './results/pretrained' if args.pretraindir is None else args.pretraindir 29 | savedir = './results/bbdropout/sample_run' if args.savedir is None else args.savedir 30 | if not os.path.isdir(savedir): 31 | os.makedirs(savedir) 32 | 33 | batch_size = args.batch_size 34 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size) 35 | x = tf.placeholder(tf.float32, [None, 784]) 36 | y = tf.placeholder(tf.float32, [None, 10]) 37 | N = mnist.train.num_examples 38 | dropout = bbdropout 39 | net = lenet_dense(x, y, True, dropout=dropout) 40 | tnet = lenet_dense(x, y, False, reuse=True, dropout=dropout) 41 | 42 | def train(): 43 | loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd'] 44 | global_step = tf.train.get_or_create_global_step() 45 | bdr = [int(n_train_batches*(args.n_epochs-1)*r) for r in [0.5, 0.75]] 46 | vals = [1e-2, 1e-3, 1e-4] 47 | lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals) 48 | train_op1 = tf.train.AdamOptimizer(lr).minimize(loss, 49 | var_list=net['qpi_vars'], global_step=global_step) 50 | train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss, 51 | var_list=net['weights']) 52 | train_op = tf.group(train_op1, train_op2) 53 | 54 | pretrain_saver = tf.train.Saver(net['weights']) 55 | saver = tf.train.Saver(net['weights']+net['qpi_vars']) 56 | logfile = open(os.path.join(savedir, 'train.log'), 'w', 0) 57 | 58 | sess = tf.Session() 59 | sess.run(tf.global_variables_initializer()) 60 | pretrain_saver.restore(sess, os.path.join(pretraindir, 'model')) 61 | 62 | train_logger = Accumulator('cent', 'acc') 63 | train_to_run = [train_op, net['cent'], net['acc']] 64 | test_logger = Accumulator('cent', 'acc') 65 | test_to_run = [tnet['cent'], tnet['acc']] 66 | for i in range(args.n_epochs): 67 | line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr)) 68 | print(line) 69 | logfile.write(line + '\n') 70 | train_logger.clear() 71 | start = time.time() 72 | for j in range(n_train_batches): 73 | bx, by = mnist.train.next_batch(batch_size) 74 | train_logger.accum(sess.run(train_to_run, {x:bx, y:by})) 75 | train_logger.print_(header='train', epoch=i+1, 76 | time=time.time()-start, logfile=logfile) 77 | 78 | test_logger.clear() 79 | for j in range(n_test_batches): 80 | bx, by = mnist.test.next_batch(batch_size) 81 | test_logger.accum(sess.run(test_to_run, {x:bx, y:by})) 82 | test_logger.print_(header='test', epoch=i+1, 83 | time=time.time()-start, logfile=logfile) 84 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n' 85 | line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n' 86 | print(line) 87 | logfile.write(line+'\n') 88 | 89 | if (i+1)%args.save_freq == 0: 90 | saver.save(sess, os.path.join(savedir, 'model')) 91 | 92 | logfile.close() 93 | saver.save(sess, os.path.join(savedir, 'model')) 94 | 95 | def test(): 96 | sess = tf.Session() 97 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']) 98 | saver.restore(sess, os.path.join(savedir, 'model')) 99 | logger = Accumulator('cent', 'acc') 100 | to_run = [tnet['cent'], tnet['acc']] 101 | for j in range(n_test_batches): 102 | bx, by = mnist.test.next_batch(batch_size) 103 | logger.accum(sess.run(to_run, {x:bx, y:by})) 104 | logger.print_(header='test') 105 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n' 106 | line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n' 107 | print(line) 108 | 109 | def visualize(): 110 | sess = tf.Session() 111 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']) 112 | saver.restore(sess, os.path.join(savedir, 'model')) 113 | 114 | n_drop = len(tnet['n_active']) 115 | fig = figure('pi') 116 | axarr = fig.subplots(n_drop) 117 | for i in range(n_drop): 118 | np_pi = sess.run(tnet['pi'][i]).reshape((1,-1)) 119 | im = axarr[i].imshow(np_pi, cmap='gray', aspect='auto') 120 | axarr[i].yaxis.set_visible(False) 121 | axarr[i].xaxis.set_major_locator(MaxNLocator(integer=True)) 122 | if i == n_drop-1: 123 | axarr[i].set_xlabel('neurons') 124 | fig.colorbar(im, ax=axarr[i]) 125 | show() 126 | 127 | def record(): 128 | sess = tf.Session() 129 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']) 130 | saver.restore(sess, os.path.join(savedir, 'model')) 131 | logger = Accumulator('cent', 'acc') 132 | to_run = [tnet['cent'], tnet['acc']] 133 | for j in range(n_test_batches): 134 | bx, by = mnist.test.next_batch(batch_size) 135 | logger.accum(sess.run(to_run, {x:bx, y:by})) 136 | np_n_active = sess.run(tnet['n_active']) 137 | 138 | if not os.path.isdir('../../records'): 139 | os.makedirs('../../records') 140 | csvfn = os.path.join('../../records', 141 | 'bbdropout_lenet_dense.csv' if args.csvfn is None else args.csvfn) 142 | 143 | if csvfn is not None: 144 | flag = 'a' if os.path.exists(csvfn) else 'w' 145 | with open(csvfn, flag) as f: 146 | writer = csv.writer(f) 147 | if flag=='w': 148 | writer.writerow(['savedir', 'cent', 'acc', 'n_active']) 149 | line = [savedir] 150 | line.append('%.4f' % logger.get('cent')) 151 | line.append('%.4f' % logger.get('acc')) 152 | line.append('-'.join(str(x) for x in np_n_active)) 153 | writer.writerow(line) 154 | 155 | if __name__=='__main__': 156 | if args.mode == 'train': 157 | train() 158 | elif args.mode == 'test': 159 | test() 160 | elif args.mode == 'vis': 161 | visualize() 162 | elif args.mode == 'record': 163 | record() 164 | else: 165 | raise ValueError('Invalid mode %s' % args.mode) 166 | -------------------------------------------------------------------------------- /scripts/lenet_dense/dbbdropout.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import tensorflow as tf 3 | from tensorflow.examples.tutorials.mnist import input_data 4 | from model.lenet import lenet_dense 5 | from model.bbdropout import bbdropout 6 | from utils.accumulator import Accumulator 7 | from utils.train import * 8 | from utils.mnist import mnist_input 9 | import time 10 | import os 11 | import argparse 12 | import csv 13 | import matplotlib 14 | matplotlib.use('Agg') 15 | import matplotlib.pyplot as plt 16 | 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--batch_size', type=int, default=100) 19 | parser.add_argument('--n_epochs', type=int, default=200) 20 | parser.add_argument('--save_freq', type=int, default=20) 21 | parser.add_argument('--vis_freq', type=int, default=20) 22 | parser.add_argument('--center_init', type=float, default=1.0) 23 | parser.add_argument('--savedir', type=str, default=None) 24 | parser.add_argument('--pretraindir', type=str, default=None) 25 | parser.add_argument('--mode', type=str, default='train') 26 | parser.add_argument('--gpu_num', type=int, default=0) 27 | parser.add_argument('--csvfn', type=str, default=None) 28 | args = parser.parse_args() 29 | 30 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' 31 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num) 32 | 33 | pretraindir = './results/bbdropout/sample_run' if args.pretraindir is None else args.pretraindir 34 | savedir = './results/dbbdropout/sample_run' if args.savedir is None else args.savedir 35 | if not os.path.isdir(savedir): 36 | os.makedirs(savedir) 37 | figdir = os.path.join(savedir, 'figs') 38 | if not os.path.isdir(figdir): 39 | os.makedirs(figdir) 40 | 41 | batch_size = args.batch_size 42 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size) 43 | x = tf.placeholder(tf.float32, [None, 784]) 44 | y = tf.placeholder(tf.float32, [None, 10]) 45 | N = mnist.train.num_examples 46 | center_init = args.center_init 47 | net = lenet_dense(x, y, True, dropout=bbdropout, 48 | dep=True, center_init=center_init) 49 | tnet = lenet_dense(x, y, False, reuse=True, dropout=bbdropout, 50 | dep=True, center_init=center_init) 51 | n_drop = len(tnet['n_active']) 52 | 53 | def train(): 54 | loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd'] 55 | global_step = tf.train.get_or_create_global_step() 56 | bdr = [int(n_train_batches*(args.n_epochs-1)*r) for r in [0.5, 0.75]] 57 | vals = [1e-2, 1e-3, 1e-4] 58 | lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals) 59 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 60 | with tf.control_dependencies(update_ops): 61 | train_op1 = tf.train.AdamOptimizer(lr).minimize(loss, 62 | var_list=net['pzx_vars'], global_step=global_step) 63 | train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss, 64 | var_list=net['weights']) 65 | train_op = tf.group(train_op1, train_op2) 66 | 67 | pretrain_saver = tf.train.Saver(net['weights']+net['qpi_vars']) 68 | saver = tf.train.Saver(net['weights']+net['qpi_vars']+net['pzx_vars']) 69 | logfile = open(os.path.join(savedir, 'train.log'), 'w', 0) 70 | 71 | sess = tf.Session() 72 | sess.run(tf.global_variables_initializer()) 73 | pretrain_saver.restore(sess, os.path.join(pretraindir, 'model')) 74 | 75 | train_logger = Accumulator('cent', 'acc') 76 | train_to_run = [train_op, net['cent'], net['acc']] 77 | test_logger = Accumulator('cent', 'acc') 78 | test_to_run = [tnet['cent'], tnet['acc']] 79 | test_to_run += tnet['n_active'] 80 | for i in range(args.n_epochs): 81 | line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr)) 82 | print(line) 83 | logfile.write(line + '\n') 84 | train_logger.clear() 85 | start = time.time() 86 | for j in range(n_train_batches): 87 | bx, by = mnist.train.next_batch(batch_size) 88 | train_logger.accum(sess.run(train_to_run, {x:bx, y:by})) 89 | train_logger.print_(header='train', epoch=i+1, 90 | time=time.time()-start, logfile=logfile) 91 | 92 | test_logger.clear() 93 | np_n_active = [0]*n_drop 94 | for j in range(n_test_batches): 95 | bx, by = mnist.test.next_batch(batch_size) 96 | res = sess.run(test_to_run, {x:bx, y:by}) 97 | test_logger.accum(res[:-n_drop]) 98 | np_n_active = [a + b for a, b in zip(np_n_active, res[-n_drop:])] 99 | test_logger.print_(header='test', epoch=i+1, 100 | time=time.time()-start, logfile=logfile) 101 | np_n_active = [int(a/n_test_batches) for a in np_n_active] 102 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n' 103 | line += 'n_active: ' + str(np_n_active) + '\n' 104 | print(line) 105 | logfile.write(line+'\n') 106 | if (i+1)%args.save_freq == 0: 107 | saver.save(sess, os.path.join(savedir, 'model')) 108 | 109 | if (i+1)%args.vis_freq == 0: 110 | fig = _visualize(sess) 111 | fig.savefig(os.path.join(figdir, 'epoch%d.png'%(i+1)), dpi=200) 112 | 113 | logfile.close() 114 | saver.save(sess, os.path.join(savedir, 'model')) 115 | 116 | def test(): 117 | sess = tf.Session() 118 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars']) 119 | saver.restore(sess, os.path.join(savedir, 'model')) 120 | logger = Accumulator('cent', 'acc') 121 | to_run = [tnet['cent'], tnet['acc']] + tnet['n_active'] 122 | np_n_active = [0]*n_drop 123 | for j in range(n_test_batches): 124 | bx, by = mnist.test.next_batch(batch_size) 125 | res = sess.run(to_run, {x:bx, y:by}) 126 | logger.accum(res[:-n_drop]) 127 | np_n_active = [a + b for a, b in zip(np_n_active, res[-n_drop:])] 128 | np_n_active = [int(a/n_test_batches) for a in np_n_active] 129 | logger.print_(header='test') 130 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n' 131 | line += 'n_active:' + str(np_n_active) + '\n' 132 | print(line) 133 | 134 | def _visualize(sess): 135 | pi_csum = [tf.matmul(y, pi, transpose_a=True) for pi in tnet['pi']] 136 | csum = tf.expand_dims(tf.reduce_sum(y, 0), 1) 137 | 138 | np_pi_csum = [0]*n_drop 139 | np_csum = 0 140 | for j in range(n_test_batches): 141 | bx, by = mnist.test.next_batch(args.batch_size) 142 | A, B = sess.run([pi_csum, csum], {x:bx, y:by}) 143 | for k in range(len(pi_csum)): 144 | np_pi_csum[k] += A[k] 145 | np_csum += B 146 | 147 | fig = plt.figure('vis') 148 | axarr = fig.subplots(n_drop) 149 | for i in range(n_drop): 150 | im = axarr[i].imshow(np_pi_csum[i]/np_csum, cmap='gray', aspect='auto') 151 | fig.colorbar(im, ax=axarr[i]) 152 | return fig 153 | 154 | def visualize(): 155 | sess = tf.Session() 156 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars']) 157 | saver.restore(sess, os.path.join(savedir, 'model')) 158 | _visualize(sess) 159 | plt.show() 160 | 161 | def record(): 162 | sess = tf.Session() 163 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars']) 164 | saver.restore(sess, os.path.join(savedir, 'model')) 165 | logger = Accumulator('cent', 'acc') 166 | to_run = [tnet['cent'], tnet['acc']] + tnet['n_active'] 167 | np_n_active = [0]*n_drop 168 | for j in range(n_test_batches): 169 | bx, by = mnist.test.next_batch(batch_size) 170 | res = sess.run(to_run, {x:bx, y:by}) 171 | logger.accum(res[:-n_drop]) 172 | np_n_active = [a + b for a, b in zip(np_n_active, res[-n_drop:])] 173 | np_n_active = [int(a/n_test_batches) for a in np_n_active] 174 | 175 | if not os.path.isdir('../../records'): 176 | os.makedirs('../../records') 177 | csvfn = os.path.join('../../records', 178 | 'dbbdropout_lenet_dense.csv' if args.csvfn is None else args.csvfn) 179 | 180 | if csvfn is not None: 181 | flag = 'a' if os.path.exists(csvfn) else 'w' 182 | with open(csvfn, flag) as f: 183 | writer = csv.writer(f) 184 | if flag=='w': 185 | writer.writerow(['savedir', 'cent', 'acc', 'n_active']) 186 | line = [savedir] 187 | line.append('%.4f' % logger.get('cent')) 188 | line.append('%.4f' % logger.get('acc')) 189 | line.append('-'.join(str(x) for x in np_n_active)) 190 | writer.writerow(line) 191 | 192 | if __name__=='__main__': 193 | if args.mode == 'train': 194 | train() 195 | elif args.mode == 'test': 196 | test() 197 | elif args.mode == 'vis': 198 | visualize() 199 | elif args.mode == 'record': 200 | record() 201 | else: 202 | raise ValueError('Invalid mode %s' % args.mode) 203 | -------------------------------------------------------------------------------- /scripts/lenet_dense/model: -------------------------------------------------------------------------------- 1 | ../../model -------------------------------------------------------------------------------- /scripts/lenet_dense/pretrain.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import tensorflow as tf 3 | from tensorflow.examples.tutorials.mnist import input_data 4 | from model.lenet import lenet_dense 5 | from utils.accumulator import Accumulator 6 | from utils.train import * 7 | from utils.mnist import mnist_input 8 | import time 9 | import os 10 | import argparse 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument('--batch_size', type=int, default=100) 14 | parser.add_argument('--n_epochs', type=int, default=200) 15 | parser.add_argument('--save_freq', type=int, default=20) 16 | parser.add_argument('--savedir', type=str, default=None) 17 | parser.add_argument('--mode', type=str, default='train') 18 | parser.add_argument('--gpu_num', type=int, default=0) 19 | args = parser.parse_args() 20 | 21 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' 22 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num) 23 | 24 | savedir = './results/pretrained' if args.savedir is None else args.savedir 25 | if not os.path.isdir(savedir): 26 | os.makedirs(savedir) 27 | 28 | batch_size = args.batch_size 29 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size) 30 | x = tf.placeholder(tf.float32, [None, 784]) 31 | y = tf.placeholder(tf.float32, [None, 10]) 32 | net = lenet_dense(x, y, True) 33 | tnet = lenet_dense(x, y, False, reuse=True) 34 | 35 | def train(): 36 | loss = net['cent'] + net['wd'] 37 | global_step = tf.train.get_or_create_global_step() 38 | lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), 39 | [n_train_batches*args.n_epochs/2], [1e-4, 1e-5]) 40 | train_op = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step) 41 | 42 | saver = tf.train.Saver(net['weights']) 43 | logfile = open(os.path.join(savedir, 'train.log'), 'w', 0) 44 | 45 | sess = tf.Session() 46 | sess.run(tf.global_variables_initializer()) 47 | 48 | train_logger = Accumulator('cent', 'acc') 49 | train_to_run = [train_op, net['cent'], net['acc']] 50 | test_logger = Accumulator('cent', 'acc') 51 | test_to_run = [tnet['cent'], tnet['acc']] 52 | for i in range(args.n_epochs): 53 | line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr)) 54 | print (line) 55 | logfile.write(line + '\n') 56 | train_logger.clear() 57 | start = time.time() 58 | for j in range(n_train_batches): 59 | bx, by = mnist.train.next_batch(batch_size) 60 | train_logger.accum(sess.run(train_to_run, {x:bx, y:by})) 61 | train_logger.print_(header='train', epoch=i+1, 62 | time=time.time()-start, logfile=logfile) 63 | 64 | test_logger.clear() 65 | for j in range(n_test_batches): 66 | bx, by = mnist.test.next_batch(batch_size) 67 | test_logger.accum(sess.run(test_to_run, {x:bx, y:by})) 68 | test_logger.print_(header='test', epoch=i+1, 69 | time=time.time()-start, logfile=logfile) 70 | 71 | print() 72 | logfile.write('\n') 73 | if (i+1)%args.save_freq == 0: 74 | saver.save(sess, os.path.join(savedir, 'model')) 75 | 76 | logfile.close() 77 | saver.save(sess, os.path.join(savedir, 'model')) 78 | 79 | def test(): 80 | sess = tf.Session() 81 | saver = tf.train.Saver(tnet['weights']) 82 | saver.restore(sess, os.path.join(savedir, 'model')) 83 | logger = Accumulator('cent', 'acc') 84 | to_run = [tnet['cent'], tnet['acc']] 85 | for j in range(n_test_batches): 86 | bx, by = mnist.test.next_batch(batch_size) 87 | logger.accum(sess.run(to_run, {x:bx, y:by})) 88 | logger.print_(header='test') 89 | 90 | if __name__=='__main__': 91 | if args.mode == 'train': 92 | train() 93 | elif args.mode == 'test': 94 | test() 95 | else: 96 | raise ValueError('Invalid mode %s' % args.mode) 97 | -------------------------------------------------------------------------------- /scripts/lenet_dense/sbpdropout.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import tensorflow as tf 3 | from tensorflow.examples.tutorials.mnist import input_data 4 | from model.lenet import lenet_dense 5 | from model.sbpdropout import sbpdropout 6 | from utils.accumulator import Accumulator 7 | from utils.train import * 8 | from utils.mnist import mnist_input 9 | import time 10 | import os 11 | import argparse 12 | import csv 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--batch_size', type=int, default=100) 16 | parser.add_argument('--n_epochs', type=int, default=200) 17 | parser.add_argument('--save_freq', type=int, default=20) 18 | parser.add_argument('--savedir', type=str, default=None) 19 | parser.add_argument('--pretraindir', type=str, default=None) 20 | parser.add_argument('--mode', type=str, default='train') 21 | parser.add_argument('--gpu_num', type=int, default=0) 22 | parser.add_argument('--csvfn', type=str, default=None) 23 | args = parser.parse_args() 24 | 25 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' 26 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num) 27 | 28 | pretraindir = './results/pretrained' if args.pretraindir is None else args.pretraindir 29 | savedir = './results/sbpdropout/sample_run' if args.savedir is None else args.savedir 30 | if not os.path.isdir(savedir): 31 | os.makedirs(savedir) 32 | 33 | batch_size = args.batch_size 34 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size) 35 | x = tf.placeholder(tf.float32, [None, 784]) 36 | y = tf.placeholder(tf.float32, [None, 10]) 37 | N = mnist.train.num_examples 38 | dropout = sbpdropout 39 | net = lenet_dense(x, y, True, dropout=dropout) 40 | tnet = lenet_dense(x, y, False, reuse=True, dropout=dropout) 41 | 42 | def train(): 43 | loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd'] 44 | global_step = tf.train.get_or_create_global_step() 45 | bdr = [int(n_train_batches*(args.n_epochs-1)*r) for r in [0.5, 0.75]] 46 | vals = [1e-2, 1e-3, 1e-4] 47 | lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals) 48 | train_op1 = tf.train.AdamOptimizer(lr).minimize(loss, 49 | var_list=net['qpi_vars'], global_step=global_step) 50 | train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss, 51 | var_list=net['weights']) 52 | train_op = tf.group(train_op1, train_op2) 53 | 54 | pretrain_saver = tf.train.Saver(net['weights']) 55 | saver = tf.train.Saver(net['weights']+net['qpi_vars']) 56 | logfile = open(os.path.join(savedir, 'train.log'), 'w', 0) 57 | 58 | sess = tf.Session() 59 | sess.run(tf.global_variables_initializer()) 60 | pretrain_saver.restore(sess, os.path.join(pretraindir, 'model')) 61 | 62 | train_logger = Accumulator('cent', 'acc') 63 | train_to_run = [train_op, net['cent'], net['acc']] 64 | test_logger = Accumulator('cent', 'acc') 65 | test_to_run = [tnet['cent'], tnet['acc']] 66 | for i in range(args.n_epochs): 67 | line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr)) 68 | print(line) 69 | logfile.write(line + '\n') 70 | train_logger.clear() 71 | start = time.time() 72 | for j in range(n_train_batches): 73 | bx, by = mnist.train.next_batch(batch_size) 74 | train_logger.accum(sess.run(train_to_run, {x:bx, y:by})) 75 | train_logger.print_(header='train', epoch=i+1, 76 | time=time.time()-start, logfile=logfile) 77 | 78 | test_logger.clear() 79 | for j in range(n_test_batches): 80 | bx, by = mnist.test.next_batch(batch_size) 81 | test_logger.accum(sess.run(test_to_run, {x:bx, y:by})) 82 | test_logger.print_(header='test', epoch=i+1, 83 | time=time.time()-start, logfile=logfile) 84 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n' 85 | line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n' 86 | print(line) 87 | logfile.write(line+'\n') 88 | if (i+1)%args.save_freq == 0: 89 | saver.save(sess, os.path.join(savedir, 'model')) 90 | 91 | logfile.close() 92 | saver.save(sess, os.path.join(savedir, 'model')) 93 | 94 | def test(): 95 | sess = tf.Session() 96 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']) 97 | saver.restore(sess, os.path.join(savedir, 'model')) 98 | logger = Accumulator('cent', 'acc') 99 | to_run = [tnet['cent'], tnet['acc']] 100 | for j in range(n_test_batches): 101 | bx, by = mnist.test.next_batch(batch_size) 102 | logger.accum(sess.run(to_run, {x:bx, y:by})) 103 | logger.print_(header='test') 104 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n' 105 | line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n' 106 | print(line) 107 | 108 | def record(): 109 | sess = tf.Session() 110 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']) 111 | saver.restore(sess, os.path.join(savedir, 'model')) 112 | logger = Accumulator('cent', 'acc') 113 | to_run = [tnet['cent'], tnet['acc']] 114 | for j in range(n_test_batches): 115 | bx, by = mnist.test.next_batch(batch_size) 116 | logger.accum(sess.run(to_run, {x:bx, y:by})) 117 | np_n_active = sess.run(tnet['n_active']) 118 | 119 | if not os.path.isdir('../../records'): 120 | os.makedirs('../../records') 121 | csvfn = os.path.join('../../records', 122 | 'sbpdropout_lenet_dense.csv' if args.csvfn is None else args.csvfn) 123 | 124 | if csvfn is not None: 125 | flag = 'a' if os.path.exists(csvfn) else 'w' 126 | with open(csvfn, flag) as f: 127 | writer = csv.writer(f) 128 | if flag=='w': 129 | writer.writerow(['savedir', 'cent', 'acc', 'n_active']) 130 | line = [savedir] 131 | line.append('%.4f' % logger.get('cent')) 132 | line.append('%.4f' % logger.get('acc')) 133 | line.append('-'.join(str(x) for x in np_n_active)) 134 | writer.writerow(line) 135 | 136 | if __name__=='__main__': 137 | if args.mode == 'train': 138 | train() 139 | elif args.mode == 'test': 140 | test() 141 | elif args.mode == 'record': 142 | record() 143 | else: 144 | raise ValueError('Invalid mode %s' % args.mode) 145 | -------------------------------------------------------------------------------- /scripts/lenet_dense/utils: -------------------------------------------------------------------------------- 1 | ../../utils/ -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenXAIProject/Network-Structure-Dropout/723df2d2392ec16eca3452d4afb81d54c4a2f841/utils/__init__.py -------------------------------------------------------------------------------- /utils/accumulator.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | class Accumulator(): 4 | def __init__(self, *args): 5 | self.args = args 6 | self.argdict = {} 7 | for i, arg in enumerate(args): 8 | self.argdict[arg] = i 9 | self.sums = [0]*len(args) 10 | self.cnt = 0 11 | 12 | def accum(self, val): 13 | val = [val] if type(val) is not list else val 14 | val = [v for v in val if v is not None] 15 | assert(len(val) == len(self.args)) 16 | for i in range(len(val)): 17 | self.sums[i] += val[i] 18 | self.cnt += 1 19 | 20 | def clear(self): 21 | self.sums = [0]*len(self.args) 22 | self.cnt = 0 23 | 24 | def get(self, arg, avg=True): 25 | i = self.argdict.get(arg, -1) 26 | assert(i is not -1) 27 | return (self.sums[i]/self.cnt if avg else self.sums[i]) 28 | 29 | def print_(self, header=None, epoch=None, it=None, time=None, 30 | logfile=None, do_not_print=[], as_int=[], 31 | avg=True): 32 | line = '' if header is None else header + ': ' 33 | if epoch is not None: 34 | line += ('epoch %d, ' % epoch) 35 | if it is not None: 36 | line += ('iter %d, ' % it) 37 | if time is not None: 38 | line += ('(%.3f secs), ' % time) 39 | 40 | args = [arg for arg in self.args if arg not in do_not_print] 41 | 42 | for arg in args[:-1]: 43 | val = self.sums[self.argdict[arg]] 44 | if avg: 45 | val /= self.cnt 46 | if arg in as_int: 47 | line += ('%s %d, ' % (arg, int(val))) 48 | else: 49 | line += ('%s %f, ' % (arg, val)) 50 | val = self.sums[self.argdict[args[-1]]] 51 | if avg: 52 | val /= self.cnt 53 | if arg in as_int: 54 | line += ('%s %d, ' % (arg, int(val))) 55 | else: 56 | line += ('%s %f' % (args[-1], val)) 57 | print(line) 58 | 59 | if logfile is not None: 60 | logfile.write(line + '\n') 61 | -------------------------------------------------------------------------------- /utils/cifar10.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import os 4 | import sys 5 | from paths import CIFAR10_PATH 6 | 7 | HEIGHT = 32 8 | WIDTH = 32 9 | DEPTH = 3 10 | NUM_CLASSES = 10 11 | NUM_DATA_FILES = 5 12 | NUM_TRAIN = 10000 * NUM_DATA_FILES 13 | NUM_TEST = 10000 14 | 15 | def record_dataset(filenames): 16 | label_bytes = 1 17 | image_bytes = DEPTH * HEIGHT * WIDTH 18 | record_bytes = label_bytes + image_bytes 19 | return tf.data.FixedLengthRecordDataset(filenames, record_bytes) 20 | 21 | def get_filenames(training): 22 | data_dir = os.path.join(CIFAR10_PATH, 'cifar-10-batches-bin') 23 | if training: 24 | return [os.path.join(data_dir, 'data_batch_%d.bin' % i) 25 | for i in range(1, NUM_DATA_FILES+1)] 26 | else: 27 | return [os.path.join(data_dir, 'test_batch.bin')] 28 | 29 | def parse_record(raw_record): 30 | """Parse a CIFAR-10 record from value.""" 31 | # Every record consists of a label followed by the image, with a fixed number 32 | # of bytes for each. 33 | label_offset = 0 34 | label_bytes = 1 35 | image_bytes = DEPTH * HEIGHT * WIDTH 36 | record_bytes = label_bytes + image_bytes 37 | 38 | # Convert from a string to a vector of uint8 that is record_bytes long. 39 | record_vector = tf.decode_raw(raw_record, tf.uint8) 40 | 41 | # The first byte represents the label, which we convert from uint8 to int32. 42 | label = tf.cast(record_vector[label_offset], tf.int32) 43 | label = tf.one_hot(label, NUM_CLASSES) 44 | 45 | # The remaining bytes after the label represent the image, which we reshape 46 | # from [depth * height * width] to [depth, height, width]. 47 | depth_major = tf.reshape( 48 | record_vector[label_offset+label_bytes:record_bytes], 49 | [DEPTH, HEIGHT, WIDTH]) 50 | 51 | # Convert from [depth, height, width] to [height, width, depth], and cast as 52 | # float32. 53 | image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32) 54 | return image, label 55 | 56 | def preprocess_image(image, training): 57 | """Preprocess a single image of layout [height, width, depth].""" 58 | if training: 59 | # Resize the image to add four extra pixels on each side. 60 | image = tf.image.resize_image_with_crop_or_pad( 61 | image, HEIGHT + 8, WIDTH + 8) 62 | 63 | # Randomly crop a [HEIGHT, WIDTH] section of the image. 64 | image = tf.random_crop(image, [HEIGHT, WIDTH, DEPTH]) 65 | 66 | # Randomly flip the image horizontally. 67 | image = tf.image.random_flip_left_right(image) 68 | 69 | # Subtract off the mean and divide by the variance of the pixels. 70 | image = tf.image.per_image_standardization(image) 71 | 72 | # transpose image back to depth major 73 | image = tf.transpose(image, [2, 1, 0]) 74 | 75 | return image 76 | 77 | def cifar10_input(batch_size, training): 78 | dataset = record_dataset(get_filenames(training)) 79 | 80 | if training: 81 | dataset = dataset.shuffle(buffer_size=NUM_TRAIN) 82 | 83 | dataset = dataset.map(parse_record) 84 | dataset = dataset.map( 85 | lambda image, label: (preprocess_image(image, training), label)) 86 | 87 | dataset = dataset.prefetch(8 * batch_size) 88 | dataset = dataset.repeat() 89 | dataset = dataset.batch(batch_size) 90 | iterator = dataset.make_one_shot_iterator() 91 | images, labels = iterator.get_next() 92 | 93 | return images, labels 94 | -------------------------------------------------------------------------------- /utils/cifar100.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import os 4 | import sys 5 | from paths import CIFAR100_PATH 6 | 7 | HEIGHT = 32 8 | WIDTH = 32 9 | DEPTH = 3 10 | NUM_SUPER_CLASSES = 20 11 | NUM_CLASSES = 100 12 | NUM_TRAIN = 50000 13 | NUM_TEST = 10000 14 | 15 | def record_dataset(filenames): 16 | label_bytes = 2 17 | image_bytes = DEPTH * HEIGHT * WIDTH 18 | record_bytes = label_bytes + image_bytes 19 | return tf.data.FixedLengthRecordDataset(filenames, record_bytes) 20 | 21 | def get_filenames(training): 22 | data_dir = os.path.join(CIFAR100_PATH, 'cifar-100-binary') 23 | if training: 24 | return [os.path.join(data_dir, 'train.bin')] 25 | else: 26 | return [os.path.join(data_dir, 'test.bin')] 27 | 28 | def parse_record(raw_record): 29 | """Parse a CIFAR-100 record from value.""" 30 | # Every record consists of a label followed by the image, with a fixed number 31 | # of bytes for each. 32 | label_bytes = 2 33 | image_bytes = DEPTH * HEIGHT * WIDTH 34 | record_bytes = label_bytes + image_bytes 35 | 36 | # Convert from a string to a vector of uint8 that is record_bytes long. 37 | record_vector = tf.decode_raw(raw_record, tf.uint8) 38 | 39 | # The first and second bytes represent the super label and the label, 40 | # which we convert from uint8 to int32. 41 | slabel = tf.cast(record_vector[0], tf.int32) 42 | slabel = tf.one_hot(slabel, NUM_SUPER_CLASSES) 43 | label = tf.cast(record_vector[1], tf.int32) 44 | label = tf.one_hot(label, NUM_CLASSES) 45 | 46 | # The remaining bytes after the label represent the image, which we reshape 47 | # from [depth * height * width] to [depth, height, width]. 48 | depth_major = tf.reshape( 49 | record_vector[label_bytes:record_bytes], 50 | [DEPTH, HEIGHT, WIDTH]) 51 | 52 | # Convert from [depth, height, width] to [height, width, depth], and cast as 53 | # float32. 54 | image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32) 55 | return image, slabel, label 56 | 57 | def preprocess_image(image, training): 58 | """Preprocess a single image of layout [height, width, depth].""" 59 | if training: 60 | # Resize the image to add four extra pixels on each side. 61 | image = tf.image.resize_image_with_crop_or_pad( 62 | image, HEIGHT + 8, WIDTH + 8) 63 | 64 | # Randomly crop a [HEIGHT, WIDTH] section of the image. 65 | image = tf.random_crop(image, [HEIGHT, WIDTH, DEPTH]) 66 | 67 | # Randomly flip the image horizontally. 68 | image = tf.image.random_flip_left_right(image) 69 | 70 | # Subtract off the mean and divide by the variance of the pixels. 71 | image = tf.image.per_image_standardization(image) 72 | 73 | # transpose image back to depth major 74 | image = tf.transpose(image, [2, 1, 0]) 75 | 76 | return image 77 | 78 | def cifar100_input(batch_size, training): 79 | dataset = record_dataset(get_filenames(training)) 80 | 81 | if training: 82 | dataset = dataset.shuffle(buffer_size=NUM_TRAIN) 83 | 84 | dataset = dataset.map(parse_record) 85 | dataset = dataset.map( 86 | lambda image, slabel, label: \ 87 | (preprocess_image(image, training), slabel, label)) 88 | 89 | dataset = dataset.prefetch(8 * batch_size) 90 | dataset = dataset.repeat() 91 | dataset = dataset.batch(batch_size) 92 | iterator = dataset.make_one_shot_iterator() 93 | images, slabels, labels = iterator.get_next() 94 | 95 | return images, slabels, labels 96 | -------------------------------------------------------------------------------- /utils/mnist.py: -------------------------------------------------------------------------------- 1 | from tensorflow.examples.tutorials.mnist import input_data 2 | from paths import MNIST_PATH 3 | 4 | def mnist_input(batch_size): 5 | mnist = input_data.read_data_sets(MNIST_PATH, one_hot=True, validation_size=0) 6 | n_train_batches = mnist.train.num_examples/batch_size 7 | n_test_batches = mnist.test.num_examples/batch_size 8 | return mnist, n_train_batches, n_test_batches 9 | -------------------------------------------------------------------------------- /utils/train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.client import device_lib 3 | 4 | def cross_entropy(logits, labels): 5 | return tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=labels) 6 | 7 | def weight_decay(decay, var_list=None): 8 | var_list = tf.trainable_variables() if var_list is None else var_list 9 | return decay*tf.add_n([tf.nn.l2_loss(var) for var in var_list]) 10 | 11 | def accuracy(logits, labels): 12 | correct = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1)) 13 | return tf.reduce_mean(tf.cast(correct, tf.float32)) 14 | 15 | def get_train_op(optim, loss, global_step=None, clip=None, var_list=None): 16 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 17 | with tf.control_dependencies(update_ops): 18 | grad_and_vars = optim.compute_gradients(loss, var_list=var_list) 19 | if clip is not None: 20 | grad_and_vars = [((None if grad is None \ 21 | else tf.clip_by_norm(grad, clip)), var) \ 22 | for grad, var in grad_and_vars] 23 | train_op = optim.apply_gradients(grad_and_vars, global_step=global_step) 24 | return train_op 25 | 26 | # copied from https://stackoverflow.com/a/38580201 27 | def get_available_gpus(): 28 | local_device_protos = device_lib.list_local_devices() 29 | mem_thres = 0.3*max([x.memory_limit for x in local_device_protos \ 30 | if x.device_type=='GPU']) 31 | return [x.name for x in local_device_protos if x.device_type=='GPU' \ 32 | and x.memory_limit > mem_thres] 33 | 34 | def average_gradients(tower_grads): 35 | """Calculate the average gradient for each shared variable across all towers. 36 | 37 | Note that this function provides a synchronization point across all towers. 38 | 39 | Args: 40 | tower_grads: List of lists of (gradient, variable) tuples. The outer list 41 | is over individual gradients. The inner list is over the gradient 42 | calculation for each tower. 43 | Returns: 44 | List of pairs of (gradient, variable) where the gradient has been averaged 45 | across all towers. 46 | """ 47 | average_grads = [] 48 | for grad_and_vars in zip(*tower_grads): 49 | # Note that each grad_and_vars looks like the following: 50 | # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) 51 | grads = [] 52 | for g, _ in grad_and_vars: 53 | # Add 0 dimension to the gradients to represent the tower. 54 | expanded_g = tf.expand_dims(g, 0) 55 | 56 | # Append on a 'tower' dimension which we will average over below. 57 | grads.append(expanded_g) 58 | 59 | # Average over the 'tower' dimension. 60 | grad = tf.concat(axis=0, values=grads) 61 | grad = tf.reduce_mean(grad, 0) 62 | 63 | # Keep in mind that the Variables are redundant because they are shared 64 | # across towers. So .. we will just return the first tower's pointer to 65 | # the Variable. 66 | v = grad_and_vars[0][1] 67 | grad_and_var = (grad, v) 68 | average_grads.append(grad_and_var) 69 | return average_grads 70 | --------------------------------------------------------------------------------