├── .gitignore
├── AUTHOR.txt
├── LICENSE
├── README.md
├── model
    ├── __init__.py
    ├── bbdropout.py
    ├── digamma.py
    ├── layers.py
    ├── lenet.py
    ├── sbpdropout.py
    └── utils
├── scripts
    ├── lenet_conv
    │   ├── bbdropout.py
    │   ├── dbbdropout.py
    │   ├── model
    │   ├── pretrain.py
    │   ├── sbpdropout.py
    │   └── utils
    └── lenet_dense
    │   ├── bbdropout.py
    │   ├── dbbdropout.py
    │   ├── model
    │   ├── pretrain.py
    │   ├── sbpdropout.py
    │   └── utils
└── utils
    ├── __init__.py
    ├── accumulator.py
    ├── cifar10.py
    ├── cifar100.py
    ├── mnist.py
    └── train.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.log
 3 | *.npy
 4 | *.npz
 5 | *.ckpt
 6 | *.tar
 7 | *.out
 8 | *.sh
 9 | utils/paths.py
10 | **/results/
11 | /records/
12 | 


--------------------------------------------------------------------------------
/AUTHOR.txt:
--------------------------------------------------------------------------------
1 | Copyright 2018 (Institution) under XAI Project supported by Ministry of Science and ICT, Korea
2 | 
3 | # This is the list of (Institution) for copyright purposes.
4 | # This does not necessarily list everyone who has contributed code, since in
5 | # some cases, their employer may be the copyright holder.  To see the full list 
6 | # of contributors, see the revision history in source control
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Learning Network Structure with Dropout
 2 | 
 3 | ### **CONTENT**
 4 | > Data-dependent variational dropout for learning a network structure
 5 | ### **How to Use**
 6 | 
 7 | ```bash
 8 | $ cd ~/[WORKING_DIR]/scripts/lenet_dense
 9 | $ python ./pretrain.py
10 | $ python ./bbdropout.py
11 | ```
12 | 
13 | <img src="http://xai.unist.ac.kr/static/img/logos/XAIC_logo.png" width="300" height="100">
14 | 
15 | # XAI Project
16 | 
17 | ### **Project Name**
18 | > A machine learning and statistical inference framework for explainable artificial intelligence(의사결정 이유를 설명할 수 있는 인간 수준의 학습·추론 프레임워크 개발)
19 | ### **Managed by**
20 | > Ministry of Science and ICT/XAIC
21 | ### **Participated Affiliation**
22 | > UNIST, Korea Univ., Yonsei Univ., KAIST., AItrics
23 | ### **Web Site**
24 | > <http://openXai.org>
25 | 


--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenXAIProject/Network-Structure-Dropout/723df2d2392ec16eca3452d4afb81d54c4a2f841/model/__init__.py


--------------------------------------------------------------------------------
/model/bbdropout.py:
--------------------------------------------------------------------------------
 1 | from layers import *
 2 | import tensorflow as tf
 3 | from tensorflow.contrib.distributions import RelaxedBernoulli
 4 | import numpy as np
 5 | 
 6 | digamma = tf.digamma
 7 | from digamma import digamma_approx as digamma_approx
 8 | lgamma = tf.lgamma
 9 | Euler = 0.577215664901532
10 | 
11 | def bbdropout(x, training,
12 |         alpha=1e-4, thres=1e-2, a_init=-1., tau=1e-1, center_init=1.0,
13 |         approx_digamma=True, scale_kl=None, dep=False,
14 |         unit_scale=True, collect=True,
15 |         name='bbdropout', reuse=None):
16 | 
17 |     N = tf.shape(x)[0]
18 |     K = x.shape[1].value
19 |     is_conv = len(x.shape)==4
20 | 
21 |     with tf.variable_scope(name+'/qpi_vars', reuse=reuse):
22 |         with tf.device('/cpu:0'):
23 |             a = softplus(tf.get_variable('a_uc', shape=[K],
24 |                 initializer=tf.constant_initializer(a_init)))
25 |             b = softplus(tf.get_variable('b_uc', shape=[K]))
26 | 
27 |     _digamma = digamma_approx if approx_digamma else digamma
28 |     kl = (a-alpha)/a * (-Euler - _digamma(b) - 1/b) \
29 |             + log(a*b) - log(alpha) - (b-1)/b
30 |     pi = (1 - tf.random_uniform([K])**(1/b))**(1/a) if training else \
31 |             b*tf.exp(lgamma(1+1/a) + lgamma(b) - lgamma(1+1/a+b))
32 | 
33 |     def hard_sigmoid(x):
34 |         return tf.clip_by_value(x, thres, 1-thres)
35 | 
36 |     if dep:
37 |         with tf.variable_scope(name+'/pzx_vars', reuse=reuse):
38 |             hid = global_avg_pool(x) if is_conv else x
39 |             hid = tf.stop_gradient(hid)
40 |             with tf.device('/cpu:0'):
41 |                 hid = layer_norm(hid, scale=False, center=False)
42 |                 scale = tf.get_variable('scale', shape=[1 if unit_scale else K],
43 |                         initializer=tf.ones_initializer())
44 |                 center = tf.get_variable('center', shape=[K],
45 |                         initializer=tf.constant_initializer(center_init))
46 |             hid = scale*hid + center
47 |         if training:
48 |             pi = pi * hard_sigmoid(hid + tf.random_normal(shape=tf.shape(hid)))
49 |             z = RelaxedBernoulli(tau, logits=logit(pi)).sample()
50 |         else:
51 |             pi = pi * hard_sigmoid(hid)
52 |             z = tf.where(tf.greater(pi, thres), pi, tf.zeros_like(pi))
53 |         #n_active = tf.reduce_mean(
54 |         #        tf.reduce_sum(tf.cast(tf.greater(pi, thres), tf.int32), 1))
55 |         n_active = tf.reduce_sum(tf.cast(tf.greater(pi, thres), tf.int32), 1)
56 |         n_active = tf.reduce_sum(n_active)/N
57 |     else:
58 |         if training:
59 |             z = RelaxedBernoulli(tau, logits=logit(pi)).sample(N)
60 |         else:
61 |             pi_ = tf.where(tf.greater(pi, thres), pi, tf.zeros_like(pi))
62 |             z = tf.tile(tf.expand_dims(pi_, 0), [N, 1])
63 |         n_active = tf.reduce_sum(tf.cast(tf.greater(pi, thres), tf.int32))
64 | 
65 |     if scale_kl is None:
66 |         kl = tf.reduce_sum(kl)
67 |     else:
68 |         kl = scale_kl * tf.reduce_mean(kl)
69 | 
70 |     if collect:
71 |         if reuse is not True:
72 |             tf.add_to_collection('kl', kl)
73 |         prefix = 'train_' if training else 'test_'
74 |         tf.add_to_collection(prefix+'pi', pi)
75 |         tf.add_to_collection(prefix+'n_active', n_active)
76 | 
77 |     z = tf.reshape(z, ([-1, K, 1, 1] if is_conv else [-1, K]))
78 |     return x*z
79 | 


--------------------------------------------------------------------------------
/model/digamma.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | # @MISC {1446110,
 3 | # TITLE = {Approximating the Digamma function},
 4 | # AUTHOR = {njuffa (https://math.stackexchange.com/users/114200/njuffa)},
 5 | # HOWPUBLISHED = {Mathematics Stack Exchange},
 6 | # NOTE = {URL:https://math.stackexchange.com/q/1446110 (version: 2015-09-22)},
 7 | # EPRINT = {https://math.stackexchange.com/q/1446110},
 8 | # URL = {https://math.stackexchange.com/q/1446110}}
 9 | 
10 | def digamma_approx(x):
11 |     def digamma_over_one(x):
12 |         return tf.log(x + 0.4849142940227510) \
13 |                 - 1/(1.0271785180163817*x)
14 |     return digamma_over_one(x+1) - 1./x
15 | 


--------------------------------------------------------------------------------
/model/layers.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | exp = tf.exp
 5 | log = lambda x: tf.log(x + 1e-20)
 6 | logit = lambda x: log(x) - log(1-x)
 7 | softplus = tf.nn.softplus
 8 | softmax = tf.nn.softmax
 9 | tanh = tf.nn.tanh
10 | relu = tf.nn.relu
11 | sigmoid = tf.nn.sigmoid
12 | 
13 | dense = tf.layers.dense
14 | flatten = tf.contrib.layers.flatten
15 | 
16 | def conv(x, filters, kernel_size=3, strides=1, **kwargs):
17 |     return tf.layers.conv2d(x, filters, kernel_size, strides,
18 |             data_format='channels_first', **kwargs)
19 | 
20 | def pool(x, **kwargs):
21 |     return tf.layers.max_pooling2d(x, 2, 2,
22 |             data_format='channels_first', **kwargs)
23 | 
24 | def global_avg_pool(x):
25 |     return tf.reduce_mean(x, axis=[2, 3])
26 | 
27 | batch_norm = tf.layers.batch_normalization
28 | layer_norm = tf.contrib.layers.layer_norm
29 | 


--------------------------------------------------------------------------------
/model/lenet.py:
--------------------------------------------------------------------------------
 1 | from layers import *
 2 | from utils.train import *
 3 | 
 4 | def lenet_dense(x, y, training, name='lenet', reuse=None,
 5 |         dropout=None, **dropout_kwargs):
 6 |     dropout_ = lambda x, subname: x if dropout is None else \
 7 |             dropout(x, training, name=name+subname, reuse=reuse,
 8 |                     **dropout_kwargs)
 9 |     x = dense(dropout_(x, '/dropout1'), 500, activation=relu,
10 |             name=name+'/dense1', reuse=reuse)
11 |     x = dense(dropout_(x, '/dropout2'), 300, activation=relu,
12 |             name=name+'/dense2', reuse=reuse)
13 |     x = dense(dropout_(x, '/dropout3'), 10, name=name+'/dense3', reuse=reuse)
14 | 
15 |     net = {}
16 |     all_vars = tf.get_collection('variables', scope=name)
17 |     net['qpi_vars'] = [v for v in all_vars if 'qpi_vars' in v.name]
18 |     net['pzx_vars'] = [v for v in all_vars if 'pzx_vars' in v.name]
19 |     net['weights'] = [v for v in all_vars \
20 |             if 'qpi_vars' not in v.name and 'pzx_vars' not in v.name]
21 | 
22 |     net['cent'] = cross_entropy(x, y)
23 |     net['wd'] = weight_decay(1e-4, var_list=net['weights'])
24 |     net['acc'] = accuracy(x, y)
25 | 
26 |     prefix = 'train_' if training else 'test_'
27 |     net['kl'] = tf.get_collection('kl')
28 |     net['pi'] = tf.get_collection(prefix+'pi')
29 |     net['n_active'] = tf.get_collection(prefix+'n_active')
30 | 
31 |     return net
32 | 
33 | def lenet_conv(x, y, training, name='lenet', reuse=None,
34 |         dropout=None, **dropout_kwargs):
35 |     dropout_ = lambda x, subname: x if dropout is None else \
36 |             dropout(x, training, name=name+subname, reuse=reuse,
37 |                     **dropout_kwargs)
38 |     x = tf.reshape(x, [-1, 1, 28, 28])
39 |     x = conv(x, 20, 5, name=name+'/conv1', reuse=reuse)
40 |     x = relu(dropout_(x, '/dropout1'))
41 |     x = pool(x, name=name+'/pool1')
42 |     x = conv(x, 50, 5, name=name+'/conv2', reuse=reuse)
43 |     x = relu(dropout_(x, '/dropout2'))
44 |     x = pool(x, name=name+'/pool2')
45 |     x = flatten(x)
46 |     x = dense(dropout_(x, '/dropout3'), 500, activation=relu,
47 |             name=name+'/dense1', reuse=reuse)
48 |     x = dense(dropout_(x, '/dropout4'), 10, name=name+'/dense2', reuse=reuse)
49 | 
50 |     net = {}
51 |     all_vars = tf.get_collection('variables', scope=name)
52 |     net['qpi_vars'] = [v for v in all_vars if 'qpi_vars' in v.name]
53 |     net['pzx_vars'] = [v for v in all_vars if 'pzx_vars' in v.name]
54 |     net['weights'] = [v for v in all_vars \
55 |             if 'qpi_vars' not in v.name and 'pzx_vars' not in v.name]
56 | 
57 |     net['cent'] = cross_entropy(x, y)
58 |     net['wd'] = weight_decay(1e-4, var_list=net['weights'])
59 |     net['acc'] = accuracy(x, y)
60 | 
61 |     prefix = 'train_' if training else 'test_'
62 |     net['kl'] = tf.get_collection('kl')
63 |     net['pi'] = tf.get_collection(prefix+'pi')
64 |     net['n_active'] = tf.get_collection(prefix+'n_active')
65 | 
66 |     return net
67 | 


--------------------------------------------------------------------------------
/model/sbpdropout.py:
--------------------------------------------------------------------------------
  1 | # copied from https://github.com/necludov/group-sparsity-sbp
  2 | import tensorflow as tf
  3 | from tensorflow.python.ops.distributions import special_math
  4 | import numpy as np
  5 | 
  6 | def phi(x):
  7 |     return 0.5*tf.erfc(-x/tf.sqrt(2.0))
  8 | 
  9 | def __erfinv(x):
 10 |     w = -tf.log((1.0-x)*(1.0+x)-1e-5)
 11 |     p_small = 2.81022636e-08*tf.ones_like(x)
 12 |     p_small = 3.43273939e-07 + p_small*(w-2.5)
 13 |     p_small = -3.5233877e-06 + p_small*(w-2.5)
 14 |     p_small = -4.39150654e-06 + p_small*(w-2.5)
 15 |     p_small = 0.00021858087 + p_small*(w-2.5)
 16 |     p_small = -0.00125372503 + p_small*(w-2.5)
 17 |     p_small = -0.00417768164 + p_small*(w-2.5)
 18 |     p_small = 0.246640727 + p_small*(w-2.5)
 19 |     p_small = 1.50140941 + p_small*(w-2.5)
 20 | 
 21 |     p_big = -0.000200214257*tf.ones_like(x)
 22 |     p_big = 0.000100950558 + p_big*(tf.sqrt(w) - 3.0)
 23 |     p_big = 0.00134934322 + p_big*(tf.sqrt(w) - 3.0)
 24 |     p_big = -0.00367342844 + p_big*(tf.sqrt(w) - 3.0)
 25 |     p_big = 0.00573950773 + p_big*(tf.sqrt(w) - 3.0)
 26 |     p_big = -0.0076224613 + p_big*(tf.sqrt(w) - 3.0)
 27 |     p_big = 0.00943887047 + p_big*(tf.sqrt(w) - 3.0)
 28 |     p_big = 1.00167406 + p_big*(tf.sqrt(w) - 3.0)
 29 |     p_big = 2.83297682 + p_big*(tf.sqrt(w) - 3.0)
 30 | 
 31 |     small_mask = tf.cast(tf.less(w, 5.0*tf.ones_like(w)), tf.float32)
 32 |     big_mask = tf.cast(tf.greater_equal(w, 5.0*tf.ones_like(w)), tf.float32)
 33 |     p = p_small*small_mask + p_big*big_mask
 34 |     return p*x
 35 | 
 36 | def erfinv(x):
 37 |     return special_math.ndtri((x+1.)/2.0)/tf.sqrt(2.)
 38 | 
 39 | def erfcx(x):
 40 |     """M. M. Shepherd and J. G. Laframboise,
 41 |        MATHEMATICS OF COMPUTATION 36, 249 (1981)
 42 |     """
 43 |     K = 3.75
 44 |     y = (tf.abs(x)-K) / (tf.abs(x)+K)
 45 |     y2 = 2.0*y
 46 |     (d, dd) = (-0.4e-20, 0.0)
 47 |     (d, dd) = (y2 * d - dd + 0.3e-20, d)
 48 |     (d, dd) = (y2 * d - dd + 0.97e-19, d)
 49 |     (d, dd) = (y2 * d - dd + 0.27e-19, d)
 50 |     (d, dd) = (y2 * d - dd + -0.2187e-17, d)
 51 |     (d, dd) = (y2 * d - dd + -0.2237e-17, d)
 52 |     (d, dd) = (y2 * d - dd + 0.50681e-16, d)
 53 |     (d, dd) = (y2 * d - dd + 0.74182e-16, d)
 54 |     (d, dd) = (y2 * d - dd + -0.1250795e-14, d)
 55 |     (d, dd) = (y2 * d - dd + -0.1864563e-14, d)
 56 |     (d, dd) = (y2 * d - dd + 0.33478119e-13, d)
 57 |     (d, dd) = (y2 * d - dd + 0.32525481e-13, d)
 58 |     (d, dd) = (y2 * d - dd + -0.965469675e-12, d)
 59 |     (d, dd) = (y2 * d - dd + 0.194558685e-12, d)
 60 |     (d, dd) = (y2 * d - dd + 0.28687950109e-10, d)
 61 |     (d, dd) = (y2 * d - dd + -0.63180883409e-10, d)
 62 |     (d, dd) = (y2 * d - dd + -0.775440020883e-09, d)
 63 |     (d, dd) = (y2 * d - dd + 0.4521959811218e-08, d)
 64 |     (d, dd) = (y2 * d - dd + 0.10764999465671e-07, d)
 65 |     (d, dd) = (y2 * d - dd + -0.218864010492344e-06, d)
 66 |     (d, dd) = (y2 * d - dd + 0.774038306619849e-06, d)
 67 |     (d, dd) = (y2 * d - dd + 0.4139027986073010e-05, d)
 68 |     (d, dd) = (y2 * d - dd + -0.69169733025012064e-04, d)
 69 |     (d, dd) = (y2 * d - dd + 0.490775836525808632e-03, d)
 70 |     (d, dd) = (y2 * d - dd + -0.2413163540417608191e-02, d)
 71 |     (d, dd) = (y2 * d - dd + 0.9074997670705265094e-02, d)
 72 |     (d, dd) = (y2 * d - dd + -0.26658668435305752277e-01, d)
 73 |     (d, dd) = (y2 * d - dd + 0.59209939998191890498e-01, d)
 74 |     (d, dd) = (y2 * d - dd + -0.84249133366517915584e-01, d)
 75 |     (d, dd) = (y2 * d - dd + -0.4590054580646477331e-02, d)
 76 |     d = y * d - dd + 0.1177578934567401754080e+01
 77 |     result = d/(1.0+2.0*tf.abs(x))
 78 |     result = tf.where(tf.is_nan(result), tf.ones_like(result), result)
 79 |     result = tf.where(tf.is_inf(result), tf.ones_like(result), result)
 80 | 
 81 |     negative_mask = tf.cast(tf.less(x, 0.0), tf.float32)
 82 |     positive_mask = tf.cast(tf.greater_equal(x, 0.0), tf.float32)
 83 |     negative_result = 2.0*tf.exp(x*x)-result
 84 |     negative_result = tf.where(tf.is_nan(negative_result), tf.ones_like(negative_result), negative_result)
 85 |     negative_result = tf.where(tf.is_inf(negative_result), tf.ones_like(negative_result), negative_result)
 86 |     result = negative_mask * negative_result + positive_mask * result
 87 |     return result
 88 | 
 89 | def phi_inv(x):
 90 |     return tf.sqrt(2.0)*erfinv(2.0*x-1)
 91 | 
 92 | def mean_truncated_log_normal_straight(mu, sigma, a, b):
 93 |     alpha = (a - mu)/sigma
 94 |     beta = (b - mu)/sigma
 95 |     z = phi(beta) - phi(alpha)
 96 |     mean = tf.exp(mu+sigma*sigma/2.0)/z*(phi(sigma-alpha) - phi(sigma-beta))
 97 |     return mean
 98 | 
 99 | def mean_truncated_log_normal_reduced(mu, sigma, a, b):
100 |     alpha = (a - mu)/sigma
101 |     beta = (b - mu)/sigma
102 |     z = phi(beta) - phi(alpha)
103 |     mean = erfcx((sigma-beta)/tf.sqrt(2.0))*tf.exp(b-beta*beta/2)
104 |     mean = mean - erfcx((sigma-alpha)/tf.sqrt(2.0))*tf.exp(a-alpha*alpha/2)
105 |     mean = mean/(2*z)
106 |     return mean
107 | 
108 | def mean_truncated_log_normal(mu, sigma, a, b):
109 |     return mean_truncated_log_normal_reduced(mu, sigma, a, b)
110 | 
111 | def median_truncated_log_normal(mu, sigma, a, b):
112 |     alpha = (a - mu)/sigma
113 |     beta = (b - mu)/sigma
114 |     gamma = phi(alpha)+0.5*(phi(beta)-phi(alpha))
115 |     return tf.exp(phi_inv(gamma)*sigma+mu)
116 | 
117 | def snr_truncated_log_normal(mu, sigma, a, b):
118 |     alpha = (a - mu)/sigma
119 |     beta = (b - mu)/sigma
120 |     z = phi(beta) - phi(alpha)
121 |     ratio = erfcx((sigma-beta)/tf.sqrt(2.0))*tf.exp((b-mu)-beta**2/2.0)
122 |     ratio = ratio - erfcx((sigma-alpha)/tf.sqrt(2.0))*tf.exp((a-mu)-alpha**2/2.0)
123 |     denominator = 2*z*erfcx((2.0*sigma-beta)/tf.sqrt(2.0))*tf.exp(2.0*(b-mu)-beta**2/2.0)
124 |     denominator = denominator - 2*z*erfcx((2.0*sigma-alpha)/tf.sqrt(2.0))*tf.exp(2.0*(a-mu)-alpha**2/2.0)
125 |     denominator = denominator - ratio**2
126 |     ratio = ratio/tf.sqrt(denominator)
127 |     return ratio
128 | 
129 | def sample_truncated_normal(mu, sigma, a, b):
130 |     alpha = (a - mu)/sigma
131 |     beta = (b - mu)/sigma
132 |     gamma = phi(alpha)+tf.random_uniform(mu.shape)*(phi(beta)-phi(alpha))
133 |     return tf.clip_by_value(phi_inv(tf.clip_by_value(gamma, 1e-5, 1.0-1e-5))*sigma+mu, a, b)
134 | 
135 | def sbpdropout(x, training,
136 |         thres=1.0, scale_kl=None, collect=True,
137 |         name='sbpdropout', reuse=None):
138 | 
139 |     min_log = -20.0
140 |     max_log = 0.0
141 | 
142 |     axis = 1
143 | 
144 |     params_shape = np.ones(x.get_shape().ndims)
145 |     params_shape[axis] = x.get_shape()[axis].value
146 | 
147 |     with tf.variable_scope(name+'/qpi_vars', reuse=reuse):
148 |         with tf.device('/cpu:0'):
149 |             mu = tf.get_variable('mu', shape=params_shape.tolist(),
150 |                                  initializer=tf.zeros_initializer())
151 |             log_sigma = tf.get_variable('log_sigma', shape=params_shape.tolist(),
152 |                     initializer=tf.constant_initializer(-5.0))
153 | 
154 |     mu = tf.clip_by_value(mu, -20.0, 5.0)
155 |     log_sigma = tf.clip_by_value(log_sigma, -20.0, 5.0)
156 |     sigma = tf.exp(log_sigma)
157 | 
158 |     # adding loss
159 |     alpha = (min_log-mu)/sigma
160 |     beta = (max_log-mu)/sigma
161 |     z = phi(beta) - phi(alpha)
162 | 
163 |     def pdf(x):
164 |         return tf.exp(-x*x/2.0)/tf.sqrt(2.0*np.pi)
165 |     kl = -log_sigma-tf.log(z)-(alpha*pdf(alpha)-beta*pdf(beta))/(2.0*z)
166 |     kl = kl+tf.log(max_log-min_log)-tf.log(2.0*np.pi*np.e)/2.0
167 |     if scale_kl is None:
168 |         kl = tf.reduce_sum(kl)
169 |     else:
170 |         kl = scale_kl*tf.reduce_mean(kl)
171 | 
172 |     if training:
173 |         z = tf.exp(sample_truncated_normal(mu, sigma, min_log, max_log))
174 |     else:
175 |         z = mean_truncated_log_normal(mu, sigma, min_log, max_log)
176 |     snr = snr_truncated_log_normal(mu, sigma, min_log, max_log)
177 |     mask = tf.cast(tf.greater(snr, thres*tf.ones_like(snr)), tf.float32)
178 | 
179 |     n_active = tf.reduce_sum(tf.cast(mask, tf.int32))
180 | 
181 |     if collect:
182 |         if reuse is not True:
183 |             tf.add_to_collection('kl', kl)
184 |         prefix = 'train_' if training else 'test_'
185 |         tf.add_to_collection(prefix+'p', snr)
186 |         tf.add_to_collection(prefix+'n_active', n_active)
187 | 
188 |     if not training:
189 |         z = mask*z
190 | 
191 |     return x*z
192 | 


--------------------------------------------------------------------------------
/model/utils:
--------------------------------------------------------------------------------
1 | ../utils/


--------------------------------------------------------------------------------
/scripts/lenet_conv/bbdropout.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import tensorflow as tf
  3 | from tensorflow.examples.tutorials.mnist import input_data
  4 | from model.lenet import lenet_conv
  5 | from model.bbdropout import bbdropout
  6 | from utils.accumulator import Accumulator
  7 | from utils.train import *
  8 | from utils.mnist import mnist_input
  9 | import time
 10 | import os
 11 | import argparse
 12 | import csv
 13 | from pylab import *
 14 | 
 15 | parser = argparse.ArgumentParser()
 16 | parser.add_argument('--batch_size', type=int, default=100)
 17 | parser.add_argument('--n_epochs', type=int, default=200)
 18 | parser.add_argument('--save_freq', type=int, default=20)
 19 | parser.add_argument('--savedir', type=str, default=None)
 20 | parser.add_argument('--pretraindir', type=str, default=None)
 21 | parser.add_argument('--mode', type=str, default='train')
 22 | parser.add_argument('--gpu_num', type=int, default=0)
 23 | parser.add_argument('--csvfn', type=str, default=None)
 24 | args = parser.parse_args()
 25 | 
 26 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
 27 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num)
 28 | 
 29 | pretraindir = './results/pretrained' if args.pretraindir is None else args.pretraindir
 30 | savedir = './results/bbdropout/sample_run' if args.savedir is None else args.savedir
 31 | if not os.path.isdir(savedir):
 32 |     os.makedirs(savedir)
 33 | 
 34 | batch_size = args.batch_size
 35 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size)
 36 | x = tf.placeholder(tf.float32, [None, 784])
 37 | y = tf.placeholder(tf.float32, [None, 10])
 38 | N = mnist.train.num_examples
 39 | scale_kl = 1e-2*N
 40 | dropout = bbdropout
 41 | net = lenet_conv(x, y, True, dropout=dropout, scale_kl=scale_kl)
 42 | tnet = lenet_conv(x, y, False, reuse=True, dropout=dropout,
 43 |         scale_kl=scale_kl)
 44 | 
 45 | def train():
 46 |     loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd']
 47 |     global_step = tf.train.get_or_create_global_step()
 48 |     bdr = [int(n_train_batches*(args.n_epochs-1)*r) for r in [0.5, 0.75]]
 49 |     vals = [1e-2, 1e-3, 1e-4]
 50 |     lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals)
 51 |     train_op1 = tf.train.AdamOptimizer(lr).minimize(loss,
 52 |             var_list=net['qpi_vars'], global_step=global_step)
 53 |     train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss,
 54 |             var_list=net['weights'])
 55 |     train_op = tf.group(train_op1, train_op2)
 56 | 
 57 |     pretrain_saver = tf.train.Saver(net['weights'])
 58 |     saver = tf.train.Saver(net['weights']+net['qpi_vars'])
 59 |     logfile = open(os.path.join(savedir, 'train.log'), 'w', 0)
 60 | 
 61 |     sess = tf.Session()
 62 |     sess.run(tf.global_variables_initializer())
 63 |     pretrain_saver.restore(sess, os.path.join(pretraindir, 'model'))
 64 | 
 65 |     train_logger = Accumulator('cent', 'acc')
 66 |     train_to_run = [train_op, net['cent'], net['acc']]
 67 |     test_logger = Accumulator('cent', 'acc')
 68 |     test_to_run = [tnet['cent'], tnet['acc']]
 69 |     for i in range(args.n_epochs):
 70 |         line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr))
 71 |         print(line)
 72 |         logfile.write(line + '\n')
 73 |         train_logger.clear()
 74 |         start = time.time()
 75 |         for j in range(n_train_batches):
 76 |             bx, by = mnist.train.next_batch(batch_size)
 77 |             train_logger.accum(sess.run(train_to_run, {x:bx, y:by}))
 78 |         train_logger.print_(header='train', epoch=i+1,
 79 |                 time=time.time()-start, logfile=logfile)
 80 | 
 81 |         test_logger.clear()
 82 |         for j in range(n_test_batches):
 83 |             bx, by = mnist.test.next_batch(batch_size)
 84 |             test_logger.accum(sess.run(test_to_run, {x:bx, y:by}))
 85 |         test_logger.print_(header='test', epoch=i+1,
 86 |                 time=time.time()-start, logfile=logfile)
 87 |         line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
 88 |         line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n'
 89 |         print(line)
 90 |         logfile.write(line+'\n')
 91 |         if (i+1) % args.save_freq == 0:
 92 |             saver.save(sess, os.path.join(savedir, 'model'))
 93 | 
 94 |     logfile.close()
 95 |     saver.save(sess, os.path.join(savedir, 'model'))
 96 | 
 97 | def test():
 98 |     sess = tf.Session()
 99 |     saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars'])
100 |     saver.restore(sess, os.path.join(savedir, 'model'))
101 |     logger = Accumulator('cent', 'acc')
102 |     to_run = [tnet['cent'], tnet['acc']]
103 |     for j in range(n_test_batches):
104 |         bx, by = mnist.test.next_batch(batch_size)
105 |         logger.accum(sess.run(to_run, {x:bx, y:by}))
106 |     logger.print_(header='test')
107 |     line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
108 |     line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n'
109 |     print(line)
110 | 
111 | def visualize():
112 |     sess = tf.Session()
113 |     saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars'])
114 |     saver.restore(sess, os.path.join(savedir, 'model'))
115 | 
116 |     n_drop = len(tnet['n_active'])
117 |     fig = figure('pi')
118 |     axarr = fig.subplots(n_drop)
119 |     for i in range(n_drop):
120 |         np_pi = sess.run(tnet['pi'][i]).reshape((1,-1))
121 |         im = axarr[i].imshow(np_pi, cmap='gray', aspect='auto')
122 |         axarr[i].yaxis.set_visible(False)
123 |         axarr[i].xaxis.set_major_locator(MaxNLocator(integer=True))
124 |         if i == n_drop-1:
125 |             axarr[i].set_xlabel('neurons')
126 |         fig.colorbar(im, ax=axarr[i])
127 |     show()
128 | 
129 | def record():
130 |     sess = tf.Session()
131 |     saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars'])
132 |     saver.restore(sess, os.path.join(savedir, 'model'))
133 |     logger = Accumulator('cent', 'acc')
134 |     to_run = [tnet['cent'], tnet['acc']]
135 |     for j in range(n_test_batches):
136 |         bx, by = mnist.test.next_batch(batch_size)
137 |         logger.accum(sess.run(to_run, {x:bx, y:by}))
138 |     np_n_active = sess.run(tnet['n_active'])
139 | 
140 |     if not os.path.isdir('../../records'):
141 |         os.makedirs('../../records')
142 |     csvfn = os.path.join('../../records',
143 |             'bbdropout_lenet_conv.csv' if args.csvfn is None else args.csvfn)
144 | 
145 |     if csvfn is not None:
146 |         flag = 'a' if os.path.exists(csvfn) else 'w'
147 |         with open(csvfn, flag) as f:
148 |             writer = csv.writer(f)
149 |             if flag=='w':
150 |                 writer.writerow(['savedir', 'cent', 'acc', 'n_active'])
151 |             line = [savedir]
152 |             line.append('%.4f' % logger.get('cent'))
153 |             line.append('%.4f' % logger.get('acc'))
154 |             line.append('-'.join(str(x) for x in np_n_active))
155 |             writer.writerow(line)
156 | 
157 | if __name__=='__main__':
158 |     if args.mode == 'train':
159 |         train()
160 |     elif args.mode == 'test':
161 |         test()
162 |     elif args.mode == 'vis':
163 |         visualize()
164 |     elif args.mode == 'record':
165 |         record()
166 |     else:
167 |         raise ValueError('Invalid mode %s' % args.mode)
168 | 


--------------------------------------------------------------------------------
/scripts/lenet_conv/dbbdropout.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import tensorflow as tf
  3 | from tensorflow.examples.tutorials.mnist import input_data
  4 | from model.lenet import lenet_conv
  5 | from model.bbdropout import bbdropout
  6 | from utils.accumulator import Accumulator
  7 | from utils.train import *
  8 | from utils.mnist import mnist_input
  9 | import time
 10 | import os
 11 | import argparse
 12 | import csv
 13 | import matplotlib
 14 | matplotlib.use('Agg')
 15 | import matplotlib.pyplot as plt
 16 | 
 17 | parser = argparse.ArgumentParser()
 18 | parser.add_argument('--batch_size', type=int, default=100)
 19 | parser.add_argument('--n_epochs', type=int, default=200)
 20 | parser.add_argument('--save_freq', type=int, default=20)
 21 | parser.add_argument('--vis_freq', type=int, default=20)
 22 | parser.add_argument('--center_init', type=float, default=1.0)
 23 | parser.add_argument('--pretraindir', type=str, default=None)
 24 | parser.add_argument('--savedir', type=str, default=None)
 25 | parser.add_argument('--mode', type=str, default='train')
 26 | parser.add_argument('--gpu_num', type=int, default=0)
 27 | parser.add_argument('--csvfn', type=str, default=None)
 28 | args = parser.parse_args()
 29 | 
 30 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
 31 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num)
 32 | 
 33 | pretraindir = './results/bbdropout/sample_run' if args.pretraindir is None else args.pretraindir
 34 | savedir = './results/dbbdropout/sample_run' if args.savedir is None else args.savedir
 35 | if not os.path.isdir(savedir):
 36 |     os.makedirs(savedir)
 37 | figdir = os.path.join(savedir, 'figs')
 38 | if not os.path.isdir(figdir):
 39 |     os.makedirs(figdir)
 40 | 
 41 | batch_size = args.batch_size
 42 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size)
 43 | x = tf.placeholder(tf.float32, [None, 784])
 44 | y = tf.placeholder(tf.float32, [None, 10])
 45 | N = mnist.train.num_examples
 46 | scale_kl = 1e-2*N
 47 | center_init = args.center_init
 48 | net = lenet_conv(x, y, True, dropout=bbdropout, scale_kl=scale_kl,
 49 |         dep=True, center_init=center_init)
 50 | tnet = lenet_conv(x, y, False, reuse=True,
 51 |         dropout=bbdropout, scale_kl=scale_kl,
 52 |         dep=True, center_init=center_init)
 53 | n_drop = len(tnet['n_active'])
 54 | 
 55 | def train():
 56 |     loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd']
 57 |     global_step = tf.train.get_or_create_global_step()
 58 |     bdr = [int(n_train_batches*(args.n_epochs-1)*r) for r in [0.5, 0.75]]
 59 |     vals = [1e-2, 1e-3, 1e-4]
 60 |     lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals)
 61 |     update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
 62 |     with tf.control_dependencies(update_ops):
 63 |         train_op1 = tf.train.AdamOptimizer(lr).minimize(loss,
 64 |                 var_list=net['pzx_vars'], global_step=global_step)
 65 |         train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss,
 66 |                 var_list=net['weights'])
 67 |     train_op = tf.group(train_op1, train_op2)
 68 | 
 69 |     pretrain_saver = tf.train.Saver(net['weights']+net['qpi_vars'])
 70 |     saver = tf.train.Saver(net['weights']+net['qpi_vars']+net['pzx_vars'])
 71 |     logfile = open(os.path.join(savedir, 'train.log'), 'w', 0)
 72 | 
 73 |     sess = tf.Session()
 74 |     sess.run(tf.global_variables_initializer())
 75 |     pretrain_saver.restore(sess, os.path.join(pretraindir, 'model'))
 76 | 
 77 |     train_logger = Accumulator('cent', 'acc')
 78 |     train_to_run = [train_op, net['cent'], net['acc']]
 79 |     test_logger = Accumulator('cent', 'acc')
 80 |     test_to_run = [tnet['cent'], tnet['acc']] + tnet['n_active']
 81 |     for i in range(args.n_epochs):
 82 |         line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr))
 83 |         print(line)
 84 |         logfile.write(line + '\n')
 85 |         train_logger.clear()
 86 |         start = time.time()
 87 |         for j in range(n_train_batches):
 88 |             bx, by = mnist.train.next_batch(batch_size)
 89 |             train_logger.accum(sess.run(train_to_run, {x:bx, y:by}))
 90 |         train_logger.print_(header='train', epoch=i+1,
 91 |                 time=time.time()-start, logfile=logfile)
 92 | 
 93 |         test_logger.clear()
 94 |         np_n_active = [0]*n_drop
 95 |         for j in range(n_test_batches):
 96 |             bx, by = mnist.test.next_batch(batch_size)
 97 |             res = sess.run(test_to_run, {x:bx, y:by})
 98 |             test_logger.accum(res[:-n_drop])
 99 |             np_n_active = [a + b for a, b in zip(np_n_active, res[-n_drop:])]
100 |         test_logger.print_(header='test', epoch=i+1,
101 |                 time=time.time()-start, logfile=logfile)
102 |         np_n_active = [int(a/n_test_batches) for a in np_n_active]
103 |         line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
104 |         line += 'n_active: ' + str(np_n_active) + '\n'
105 |         print(line)
106 |         logfile.write(line+'\n')
107 | 
108 |         if (i+1) % args.save_freq == 0:
109 |             saver.save(sess, os.path.join(savedir, 'model'))
110 | 
111 |         if (i+1)%args.vis_freq == 0:
112 |             fig = _visualize(sess)
113 |             fig.savefig(os.path.join(figdir, 'epoch%d.png'%(i+1)), dpi=200)
114 | 
115 |     saver.save(sess, os.path.join(savedir, 'model'))
116 |     logfile.close()
117 | 
118 | def test():
119 |     sess = tf.Session()
120 |     saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars'])
121 |     saver.restore(sess, os.path.join(savedir, 'model'))
122 |     logger = Accumulator('cent', 'acc')
123 |     to_run = [tnet['cent'], tnet['acc']] + tnet['n_active']
124 |     np_n_active = [0]*n_drop
125 |     for j in range(n_test_batches):
126 |         bx, by = mnist.test.next_batch(batch_size)
127 |         res = sess.run(to_run, {x:bx, y:by})
128 |         logger.accum(res[:-n_drop])
129 |         np_n_active = [a + b for a, b in zip(np_n_active, res[-n_drop:])]
130 |     np_n_active = [int(a/n_test_batches) for a in np_n_active]
131 |     logger.print_(header='test')
132 |     line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
133 |     line += 'n_active:' + str(np_n_active) + '\n'
134 |     print(line)
135 | 
136 | def _visualize(sess):
137 |     pi_csum = [tf.matmul(y, pi, transpose_a=True) for pi in tnet['pi']]
138 |     csum = tf.expand_dims(tf.reduce_sum(y, 0), 1)
139 | 
140 |     np_pi_csum = [0]*n_drop
141 |     np_csum = 0
142 |     for j in range(n_test_batches):
143 |         bx, by = mnist.test.next_batch(args.batch_size)
144 |         A, B = sess.run([pi_csum, csum], {x:bx, y:by})
145 |         for k in range(len(pi_csum)):
146 |             np_pi_csum[k] += A[k]
147 |         np_csum += B
148 | 
149 |     fig = plt.figure('vis')
150 |     axarr = fig.subplots(n_drop)
151 |     for i in range(n_drop):
152 |         im = axarr[i].imshow(np_pi_csum[i]/np_csum, cmap='gray', aspect='auto')
153 |         fig.colorbar(im, ax=axarr[i])
154 |     return fig
155 | 
156 | def visualize():
157 |     sess = tf.Session()
158 |     saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars'])
159 |     saver.restore(sess, os.path.join(savedir, 'model'))
160 |     _visualize(sess)
161 |     plt.show()
162 | 
163 | def record():
164 |     sess = tf.Session()
165 |     saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars'])
166 |     saver.restore(sess, os.path.join(savedir, 'model'))
167 |     logger = Accumulator('cent', 'acc')
168 |     to_run = [tnet['cent'], tnet['acc']] + tnet['n_active']
169 |     np_n_active = [0]*n_drop
170 |     for j in range(n_test_batches):
171 |         bx, by = mnist.test.next_batch(batch_size)
172 |         res = sess.run(to_run, {x:bx, y:by})
173 |         logger.accum(res[:-n_drop])
174 |         np_n_active = [a + b for a, b in zip(np_n_active, res[-n_drop:])]
175 |     np_n_active = [int(a/n_test_batches) for a in np_n_active]
176 | 
177 |     if not os.path.isdir('../../records'):
178 |         os.makedirs('../../records')
179 |     csvfn = os.path.join('../../records',
180 |             'dbbdropout_lenet_conv.csv' if args.csvfn is None else args.csvfn)
181 | 
182 |     if csvfn is not None:
183 |         flag = 'a' if os.path.exists(csvfn) else 'w'
184 |         with open(csvfn, flag) as f:
185 |             writer = csv.writer(f)
186 |             if flag=='w':
187 |                 writer.writerow(['savedir', 'cent', 'acc', 'n_active'])
188 |             line = [savedir]
189 |             line.append('%.4f' % logger.get('cent'))
190 |             line.append('%.4f' % logger.get('acc'))
191 |             line.append('-'.join(str(x) for x in np_n_active))
192 |             writer.writerow(line)
193 | 
194 | if __name__=='__main__':
195 |     if args.mode == 'train':
196 |         train()
197 |     elif args.mode == 'test':
198 |         test()
199 |     elif args.mode == 'vis':
200 |         visualize()
201 |     elif args.mode == 'record':
202 |         record()
203 |     else:
204 |         raise ValueError('Invalid mode %s' % args.mode)
205 | 


--------------------------------------------------------------------------------
/scripts/lenet_conv/model:
--------------------------------------------------------------------------------
1 | ../../model/


--------------------------------------------------------------------------------
/scripts/lenet_conv/pretrain.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import tensorflow as tf
 3 | from tensorflow.examples.tutorials.mnist import input_data
 4 | from model.lenet import lenet_conv
 5 | from utils.accumulator import Accumulator
 6 | from utils.train import *
 7 | from utils.mnist import mnist_input
 8 | import time
 9 | import os
10 | import argparse
11 | 
12 | parser = argparse.ArgumentParser()
13 | parser.add_argument('--batch_size', type=int, default=100)
14 | parser.add_argument('--n_epochs', type=int, default=200)
15 | parser.add_argument('--save_freq', type=int, default=20)
16 | parser.add_argument('--savedir', type=str, default=None)
17 | parser.add_argument('--mode', type=str, default='train')
18 | parser.add_argument('--gpu_num', type=int, default=0)
19 | args = parser.parse_args()
20 | 
21 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
22 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num)
23 | 
24 | savedir = './results/pretrained' if args.savedir is None else args.savedir
25 | if not os.path.isdir(savedir):
26 |     os.makedirs(savedir)
27 | 
28 | batch_size = args.batch_size
29 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size)
30 | x = tf.placeholder(tf.float32, [None, 784])
31 | y = tf.placeholder(tf.float32, [None, 10])
32 | net = lenet_conv(x, y, True)
33 | tnet = lenet_conv(x, y, False, reuse=True)
34 | 
35 | def train():
36 |     loss = net['cent'] + net['wd']
37 |     global_step = tf.train.get_or_create_global_step()
38 |     lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32),
39 |             [n_train_batches*args.n_epochs/2], [1e-4, 1e-5])
40 |     train_op = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step)
41 | 
42 |     saver = tf.train.Saver(net['weights'])
43 |     logfile = open(os.path.join(savedir, 'train.log'), 'w', 0)
44 | 
45 |     sess = tf.Session()
46 |     sess.run(tf.global_variables_initializer())
47 | 
48 |     train_logger = Accumulator('cent', 'acc')
49 |     train_to_run = [train_op, net['cent'], net['acc']]
50 |     test_logger = Accumulator('cent', 'acc')
51 |     test_to_run = [tnet['cent'], tnet['acc']]
52 |     for i in range(args.n_epochs):
53 |         line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr))
54 |         print(line)
55 |         logfile.write(line + '\n')
56 |         train_logger.clear()
57 |         start = time.time()
58 |         for j in range(n_train_batches):
59 |             bx, by = mnist.train.next_batch(batch_size)
60 |             train_logger.accum(sess.run(train_to_run, {x:bx, y:by}))
61 |         train_logger.print_(header='train', epoch=i+1,
62 |                 time=time.time()-start, logfile=logfile)
63 | 
64 |         test_logger.clear()
65 |         for j in range(n_test_batches):
66 |             bx, by = mnist.test.next_batch(batch_size)
67 |             test_logger.accum(sess.run(test_to_run, {x:bx, y:by}))
68 |         test_logger.print_(header='test', epoch=i+1,
69 |                 time=time.time()-start, logfile=logfile)
70 |         print()
71 |         logfile.write('\n')
72 |         if (i+1)%args.save_freq == 0:
73 |             saver.save(sess, os.path.join(savedir, 'model'))
74 | 
75 |     logfile.close()
76 |     saver.save(sess, os.path.join(savedir, 'model'))
77 | 
78 | def test():
79 |     sess = tf.Session()
80 |     saver = tf.train.Saver(tnet['weights'])
81 |     saver.restore(sess, os.path.join(savedir, 'model'))
82 |     logger = Accumulator('cent', 'acc')
83 |     to_run = [tnet['cent'], tnet['acc']]
84 |     for j in range(n_test_batches):
85 |         bx, by = mnist.test.next_batch(batch_size)
86 |         logger.accum(sess.run(to_run, {x:bx, y:by}))
87 |     logger.print_(header='test')
88 | 
89 | if __name__=='__main__':
90 |     if args.mode == 'train':
91 |         train()
92 |     elif args.mode == 'test':
93 |         test()
94 |     else:
95 |         raise ValueError('Invalid mode %s' % args.mode)
96 | 


--------------------------------------------------------------------------------
/scripts/lenet_conv/sbpdropout.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import tensorflow as tf
  3 | from tensorflow.examples.tutorials.mnist import input_data
  4 | from model.lenet import lenet_conv
  5 | from model.sbpdropout import sbpdropout
  6 | from utils.accumulator import Accumulator
  7 | from utils.train import *
  8 | from utils.mnist import mnist_input
  9 | import time
 10 | import os
 11 | import argparse
 12 | import csv
 13 | 
 14 | parser = argparse.ArgumentParser()
 15 | parser.add_argument('--batch_size', type=int, default=100)
 16 | parser.add_argument('--n_epochs', type=int, default=200)
 17 | parser.add_argument('--save_freq', type=int, default=20)
 18 | parser.add_argument('--savedir', type=str, default=None)
 19 | parser.add_argument('--pretraindir', type=str, default=None)
 20 | parser.add_argument('--mode', type=str, default='train')
 21 | parser.add_argument('--gpu_num', type=int, default=0)
 22 | parser.add_argument('--csvfn', type=str, default=None)
 23 | args = parser.parse_args()
 24 | 
 25 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
 26 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num)
 27 | 
 28 | pretraindir = './results/pretrained' if args.pretraindir is None else args.pretraindir
 29 | savedir = './results/bbdropout/sample_run' if args.savedir is None else args.savedir
 30 | if not os.path.isdir(savedir):
 31 |     os.makedirs(savedir)
 32 | 
 33 | batch_size = args.batch_size
 34 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size)
 35 | x = tf.placeholder(tf.float32, [None, 784])
 36 | y = tf.placeholder(tf.float32, [None, 10])
 37 | N = mnist.train.num_examples
 38 | scale_kl = 1e-2*N
 39 | dropout = sbpdropout
 40 | net = lenet_conv(x, y, True, dropout=dropout, scale_kl=scale_kl)
 41 | tnet = lenet_conv(x, y, False, reuse=True, dropout=dropout,
 42 |         scale_kl=scale_kl)
 43 | 
 44 | def train():
 45 |     loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd']
 46 |     global_step = tf.train.get_or_create_global_step()
 47 |     bdr = [int(n_train_batches*(args.n_epochs-1)*r) for r in [0.5, 0.75]]
 48 |     vals = [1e-2, 1e-3, 1e-4]
 49 |     lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals)
 50 |     train_op1 = tf.train.AdamOptimizer(lr).minimize(loss,
 51 |             var_list=net['qpi_vars'], global_step=global_step)
 52 |     train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss,
 53 |             var_list=net['weights'])
 54 |     train_op = tf.group(train_op1, train_op2)
 55 | 
 56 |     pretrain_saver = tf.train.Saver(net['weights'])
 57 |     saver = tf.train.Saver(net['weights']+net['qpi_vars'])
 58 |     logfile = open(os.path.join(savedir, 'train.log'), 'w', 0)
 59 | 
 60 |     sess = tf.Session()
 61 |     sess.run(tf.global_variables_initializer())
 62 |     pretrain_saver.restore(sess, os.path.join(pretraindir, 'model'))
 63 | 
 64 |     train_logger = Accumulator('cent', 'acc')
 65 |     train_to_run = [train_op, net['cent'], net['acc']]
 66 |     test_logger = Accumulator('cent', 'acc')
 67 |     test_to_run = [tnet['cent'], tnet['acc']]
 68 |     for i in range(args.n_epochs):
 69 |         line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr))
 70 |         print(line)
 71 |         logfile.write(line + '\n')
 72 |         train_logger.clear()
 73 |         start = time.time()
 74 |         for j in range(n_train_batches):
 75 |             bx, by = mnist.train.next_batch(batch_size)
 76 |             train_logger.accum(sess.run(train_to_run, {x:bx, y:by}))
 77 |         train_logger.print_(header='train', epoch=i+1,
 78 |                 time=time.time()-start, logfile=logfile)
 79 | 
 80 |         test_logger.clear()
 81 |         for j in range(n_test_batches):
 82 |             bx, by = mnist.test.next_batch(batch_size)
 83 |             test_logger.accum(sess.run(test_to_run, {x:bx, y:by}))
 84 |         test_logger.print_(header='test', epoch=i+1,
 85 |                 time=time.time()-start, logfile=logfile)
 86 |         line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
 87 |         line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n'
 88 |         print(line)
 89 |         logfile.write(line+'\n')
 90 |         if (i+1)%args.save_freq == 0:
 91 |             saver.save(sess, os.path.join(savedir, 'model'))
 92 | 
 93 |     logfile.close()
 94 |     saver.save(sess, os.path.join(savedir, 'model'))
 95 | 
 96 | def test():
 97 |     sess = tf.Session()
 98 |     saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars'])
 99 |     saver.restore(sess, os.path.join(savedir, 'model'))
100 |     logger = Accumulator('cent', 'acc')
101 |     to_run = [tnet['cent'], tnet['acc']]
102 |     for j in range(n_test_batches):
103 |         bx, by = mnist.test.next_batch(batch_size)
104 |         logger.accum(sess.run(to_run, {x:bx, y:by}))
105 |     logger.print_(header='test')
106 |     line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
107 |     line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n'
108 |     print(line)
109 | 
110 | def record():
111 |     sess = tf.Session()
112 |     saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars'])
113 |     saver.restore(sess, os.path.join(savedir, 'model'))
114 |     logger = Accumulator('cent', 'acc')
115 |     to_run = [tnet['cent'], tnet['acc']]
116 |     for j in range(n_test_batches):
117 |         bx, by = mnist.test.next_batch(batch_size)
118 |         logger.accum(sess.run(to_run, {x:bx, y:by}))
119 |     np_n_active = sess.run(tnet['n_active'])
120 | 
121 |     if not os.path.isdir('../../records'):
122 |         os.makedirs('../../records')
123 |     csvfn = os.path.join('../../records',
124 |             'sbpdropout_lenet_conv.csv' if args.csvfn is None else args.csvfn)
125 | 
126 |     if csvfn is not None:
127 |         flag = 'a' if os.path.exists(csvfn) else 'w'
128 |         with open(csvfn, flag) as f:
129 |             writer = csv.writer(f)
130 |             if flag=='w':
131 |                 writer.writerow(['savedir', 'cent', 'acc', 'n_active'])
132 |             line = [savedir]
133 |             line.append('%.4f' % logger.get('cent'))
134 |             line.append('%.4f' % logger.get('acc'))
135 |             line.append('-'.join(str(x) for x in np_n_active))
136 |             writer.writerow(line)
137 | 
138 | if __name__=='__main__':
139 |     if args.mode == 'train':
140 |         train()
141 |     elif args.mode == 'test':
142 |         test()
143 |     elif args.mode == 'record':
144 |         record()
145 |     else:
146 |         raise ValueError('Invalid mode %s' % args.mode)
147 | 


--------------------------------------------------------------------------------
/scripts/lenet_conv/utils:
--------------------------------------------------------------------------------
1 | ../../utils/


--------------------------------------------------------------------------------
/scripts/lenet_dense/bbdropout.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from tensorflow.examples.tutorials.mnist import input_data
  3 | from model.lenet import lenet_dense
  4 | from model.bbdropout import bbdropout
  5 | from utils.accumulator import Accumulator
  6 | from utils.train import *
  7 | from utils.mnist import mnist_input
  8 | import time
  9 | import os
 10 | import argparse
 11 | import csv
 12 | from pylab import *
 13 | 
 14 | parser = argparse.ArgumentParser()
 15 | parser.add_argument('--batch_size', type=int, default=100)
 16 | parser.add_argument('--n_epochs', type=int, default=200)
 17 | parser.add_argument('--save_freq', type=int, default=20)
 18 | parser.add_argument('--savedir', type=str, default=None)
 19 | parser.add_argument('--pretraindir', type=str, default=None)
 20 | parser.add_argument('--mode', type=str, default='train')
 21 | parser.add_argument('--gpu_num', type=int, default=0)
 22 | parser.add_argument('--csvfn', type=str, default=None)
 23 | args = parser.parse_args()
 24 | 
 25 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
 26 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num)
 27 | 
 28 | pretraindir = './results/pretrained' if args.pretraindir is None else args.pretraindir
 29 | savedir = './results/bbdropout/sample_run' if args.savedir is None else args.savedir
 30 | if not os.path.isdir(savedir):
 31 |     os.makedirs(savedir)
 32 | 
 33 | batch_size = args.batch_size
 34 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size)
 35 | x = tf.placeholder(tf.float32, [None, 784])
 36 | y = tf.placeholder(tf.float32, [None, 10])
 37 | N = mnist.train.num_examples
 38 | dropout = bbdropout
 39 | net = lenet_dense(x, y, True, dropout=dropout)
 40 | tnet = lenet_dense(x, y, False, reuse=True, dropout=dropout)
 41 | 
 42 | def train():
 43 |     loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd']
 44 |     global_step = tf.train.get_or_create_global_step()
 45 |     bdr = [int(n_train_batches*(args.n_epochs-1)*r) for r in [0.5, 0.75]]
 46 |     vals = [1e-2, 1e-3, 1e-4]
 47 |     lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals)
 48 |     train_op1 = tf.train.AdamOptimizer(lr).minimize(loss,
 49 |             var_list=net['qpi_vars'], global_step=global_step)
 50 |     train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss,
 51 |             var_list=net['weights'])
 52 |     train_op = tf.group(train_op1, train_op2)
 53 | 
 54 |     pretrain_saver = tf.train.Saver(net['weights'])
 55 |     saver = tf.train.Saver(net['weights']+net['qpi_vars'])
 56 |     logfile = open(os.path.join(savedir, 'train.log'), 'w', 0)
 57 | 
 58 |     sess = tf.Session()
 59 |     sess.run(tf.global_variables_initializer())
 60 |     pretrain_saver.restore(sess, os.path.join(pretraindir, 'model'))
 61 | 
 62 |     train_logger = Accumulator('cent', 'acc')
 63 |     train_to_run = [train_op, net['cent'], net['acc']]
 64 |     test_logger = Accumulator('cent', 'acc')
 65 |     test_to_run = [tnet['cent'], tnet['acc']]
 66 |     for i in range(args.n_epochs):
 67 |         line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr))
 68 |         print(line)
 69 |         logfile.write(line + '\n')
 70 |         train_logger.clear()
 71 |         start = time.time()
 72 |         for j in range(n_train_batches):
 73 |             bx, by = mnist.train.next_batch(batch_size)
 74 |             train_logger.accum(sess.run(train_to_run, {x:bx, y:by}))
 75 |         train_logger.print_(header='train', epoch=i+1,
 76 |                 time=time.time()-start, logfile=logfile)
 77 | 
 78 |         test_logger.clear()
 79 |         for j in range(n_test_batches):
 80 |             bx, by = mnist.test.next_batch(batch_size)
 81 |             test_logger.accum(sess.run(test_to_run, {x:bx, y:by}))
 82 |         test_logger.print_(header='test', epoch=i+1,
 83 |                 time=time.time()-start, logfile=logfile)
 84 |         line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
 85 |         line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n'
 86 |         print(line)
 87 |         logfile.write(line+'\n')
 88 | 
 89 |         if (i+1)%args.save_freq == 0:
 90 |             saver.save(sess, os.path.join(savedir, 'model'))
 91 | 
 92 |     logfile.close()
 93 |     saver.save(sess, os.path.join(savedir, 'model'))
 94 | 
 95 | def test():
 96 |     sess = tf.Session()
 97 |     saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars'])
 98 |     saver.restore(sess, os.path.join(savedir, 'model'))
 99 |     logger = Accumulator('cent', 'acc')
100 |     to_run = [tnet['cent'], tnet['acc']]
101 |     for j in range(n_test_batches):
102 |         bx, by = mnist.test.next_batch(batch_size)
103 |         logger.accum(sess.run(to_run, {x:bx, y:by}))
104 |     logger.print_(header='test')
105 |     line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
106 |     line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n'
107 |     print(line)
108 | 
109 | def visualize():
110 |     sess = tf.Session()
111 |     saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars'])
112 |     saver.restore(sess, os.path.join(savedir, 'model'))
113 | 
114 |     n_drop = len(tnet['n_active'])
115 |     fig = figure('pi')
116 |     axarr = fig.subplots(n_drop)
117 |     for i in range(n_drop):
118 |         np_pi = sess.run(tnet['pi'][i]).reshape((1,-1))
119 |         im = axarr[i].imshow(np_pi, cmap='gray', aspect='auto')
120 |         axarr[i].yaxis.set_visible(False)
121 |         axarr[i].xaxis.set_major_locator(MaxNLocator(integer=True))
122 |         if i == n_drop-1:
123 |             axarr[i].set_xlabel('neurons')
124 |         fig.colorbar(im, ax=axarr[i])
125 |     show()
126 | 
127 | def record():
128 |     sess = tf.Session()
129 |     saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars'])
130 |     saver.restore(sess, os.path.join(savedir, 'model'))
131 |     logger = Accumulator('cent', 'acc')
132 |     to_run = [tnet['cent'], tnet['acc']]
133 |     for j in range(n_test_batches):
134 |         bx, by = mnist.test.next_batch(batch_size)
135 |         logger.accum(sess.run(to_run, {x:bx, y:by}))
136 |     np_n_active = sess.run(tnet['n_active'])
137 | 
138 |     if not os.path.isdir('../../records'):
139 |         os.makedirs('../../records')
140 |     csvfn = os.path.join('../../records',
141 |             'bbdropout_lenet_dense.csv' if args.csvfn is None else args.csvfn)
142 | 
143 |     if csvfn is not None:
144 |         flag = 'a' if os.path.exists(csvfn) else 'w'
145 |         with open(csvfn, flag) as f:
146 |             writer = csv.writer(f)
147 |             if flag=='w':
148 |                 writer.writerow(['savedir', 'cent', 'acc', 'n_active'])
149 |             line = [savedir]
150 |             line.append('%.4f' % logger.get('cent'))
151 |             line.append('%.4f' % logger.get('acc'))
152 |             line.append('-'.join(str(x) for x in np_n_active))
153 |             writer.writerow(line)
154 | 
155 | if __name__=='__main__':
156 |     if args.mode == 'train':
157 |         train()
158 |     elif args.mode == 'test':
159 |         test()
160 |     elif args.mode == 'vis':
161 |         visualize()
162 |     elif args.mode == 'record':
163 |         record()
164 |     else:
165 |         raise ValueError('Invalid mode %s' % args.mode)
166 | 


--------------------------------------------------------------------------------
/scripts/lenet_dense/dbbdropout.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import tensorflow as tf
  3 | from tensorflow.examples.tutorials.mnist import input_data
  4 | from model.lenet import lenet_dense
  5 | from model.bbdropout import bbdropout
  6 | from utils.accumulator import Accumulator
  7 | from utils.train import *
  8 | from utils.mnist import mnist_input
  9 | import time
 10 | import os
 11 | import argparse
 12 | import csv
 13 | import matplotlib
 14 | matplotlib.use('Agg')
 15 | import matplotlib.pyplot as plt
 16 | 
 17 | parser = argparse.ArgumentParser()
 18 | parser.add_argument('--batch_size', type=int, default=100)
 19 | parser.add_argument('--n_epochs', type=int, default=200)
 20 | parser.add_argument('--save_freq', type=int, default=20)
 21 | parser.add_argument('--vis_freq', type=int, default=20)
 22 | parser.add_argument('--center_init', type=float, default=1.0)
 23 | parser.add_argument('--savedir', type=str, default=None)
 24 | parser.add_argument('--pretraindir', type=str, default=None)
 25 | parser.add_argument('--mode', type=str, default='train')
 26 | parser.add_argument('--gpu_num', type=int, default=0)
 27 | parser.add_argument('--csvfn', type=str, default=None)
 28 | args = parser.parse_args()
 29 | 
 30 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
 31 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num)
 32 | 
 33 | pretraindir = './results/bbdropout/sample_run' if args.pretraindir is None else args.pretraindir
 34 | savedir = './results/dbbdropout/sample_run' if args.savedir is None else args.savedir
 35 | if not os.path.isdir(savedir):
 36 |     os.makedirs(savedir)
 37 | figdir = os.path.join(savedir, 'figs')
 38 | if not os.path.isdir(figdir):
 39 |     os.makedirs(figdir)
 40 | 
 41 | batch_size = args.batch_size
 42 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size)
 43 | x = tf.placeholder(tf.float32, [None, 784])
 44 | y = tf.placeholder(tf.float32, [None, 10])
 45 | N = mnist.train.num_examples
 46 | center_init = args.center_init
 47 | net = lenet_dense(x, y, True, dropout=bbdropout,
 48 |         dep=True, center_init=center_init)
 49 | tnet = lenet_dense(x, y, False, reuse=True, dropout=bbdropout,
 50 |         dep=True, center_init=center_init)
 51 | n_drop = len(tnet['n_active'])
 52 | 
 53 | def train():
 54 |     loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd']
 55 |     global_step = tf.train.get_or_create_global_step()
 56 |     bdr = [int(n_train_batches*(args.n_epochs-1)*r) for r in [0.5, 0.75]]
 57 |     vals = [1e-2, 1e-3, 1e-4]
 58 |     lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals)
 59 |     update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
 60 |     with tf.control_dependencies(update_ops):
 61 |         train_op1 = tf.train.AdamOptimizer(lr).minimize(loss,
 62 |                 var_list=net['pzx_vars'], global_step=global_step)
 63 |         train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss,
 64 |                 var_list=net['weights'])
 65 |     train_op = tf.group(train_op1, train_op2)
 66 | 
 67 |     pretrain_saver = tf.train.Saver(net['weights']+net['qpi_vars'])
 68 |     saver = tf.train.Saver(net['weights']+net['qpi_vars']+net['pzx_vars'])
 69 |     logfile = open(os.path.join(savedir, 'train.log'), 'w', 0)
 70 | 
 71 |     sess = tf.Session()
 72 |     sess.run(tf.global_variables_initializer())
 73 |     pretrain_saver.restore(sess, os.path.join(pretraindir, 'model'))
 74 | 
 75 |     train_logger = Accumulator('cent', 'acc')
 76 |     train_to_run = [train_op, net['cent'], net['acc']]
 77 |     test_logger = Accumulator('cent', 'acc')
 78 |     test_to_run = [tnet['cent'], tnet['acc']]
 79 |     test_to_run += tnet['n_active']
 80 |     for i in range(args.n_epochs):
 81 |         line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr))
 82 |         print(line)
 83 |         logfile.write(line + '\n')
 84 |         train_logger.clear()
 85 |         start = time.time()
 86 |         for j in range(n_train_batches):
 87 |             bx, by = mnist.train.next_batch(batch_size)
 88 |             train_logger.accum(sess.run(train_to_run, {x:bx, y:by}))
 89 |         train_logger.print_(header='train', epoch=i+1,
 90 |                 time=time.time()-start, logfile=logfile)
 91 | 
 92 |         test_logger.clear()
 93 |         np_n_active = [0]*n_drop
 94 |         for j in range(n_test_batches):
 95 |             bx, by = mnist.test.next_batch(batch_size)
 96 |             res = sess.run(test_to_run, {x:bx, y:by})
 97 |             test_logger.accum(res[:-n_drop])
 98 |             np_n_active = [a + b for a, b in zip(np_n_active, res[-n_drop:])]
 99 |         test_logger.print_(header='test', epoch=i+1,
100 |                 time=time.time()-start, logfile=logfile)
101 |         np_n_active = [int(a/n_test_batches) for a in np_n_active]
102 |         line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
103 |         line += 'n_active: ' + str(np_n_active) + '\n'
104 |         print(line)
105 |         logfile.write(line+'\n')
106 |         if (i+1)%args.save_freq == 0:
107 |             saver.save(sess, os.path.join(savedir, 'model'))
108 | 
109 |         if (i+1)%args.vis_freq == 0:
110 |             fig = _visualize(sess)
111 |             fig.savefig(os.path.join(figdir, 'epoch%d.png'%(i+1)), dpi=200)
112 | 
113 |     logfile.close()
114 |     saver.save(sess, os.path.join(savedir, 'model'))
115 | 
116 | def test():
117 |     sess = tf.Session()
118 |     saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars'])
119 |     saver.restore(sess, os.path.join(savedir, 'model'))
120 |     logger = Accumulator('cent', 'acc')
121 |     to_run = [tnet['cent'], tnet['acc']] + tnet['n_active']
122 |     np_n_active = [0]*n_drop
123 |     for j in range(n_test_batches):
124 |         bx, by = mnist.test.next_batch(batch_size)
125 |         res = sess.run(to_run, {x:bx, y:by})
126 |         logger.accum(res[:-n_drop])
127 |         np_n_active = [a + b for a, b in zip(np_n_active, res[-n_drop:])]
128 |     np_n_active = [int(a/n_test_batches) for a in np_n_active]
129 |     logger.print_(header='test')
130 |     line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
131 |     line += 'n_active:' + str(np_n_active) + '\n'
132 |     print(line)
133 | 
134 | def _visualize(sess):
135 |     pi_csum = [tf.matmul(y, pi, transpose_a=True) for pi in tnet['pi']]
136 |     csum = tf.expand_dims(tf.reduce_sum(y, 0), 1)
137 | 
138 |     np_pi_csum = [0]*n_drop
139 |     np_csum = 0
140 |     for j in range(n_test_batches):
141 |         bx, by = mnist.test.next_batch(args.batch_size)
142 |         A, B = sess.run([pi_csum, csum], {x:bx, y:by})
143 |         for k in range(len(pi_csum)):
144 |             np_pi_csum[k] += A[k]
145 |         np_csum += B
146 | 
147 |     fig = plt.figure('vis')
148 |     axarr = fig.subplots(n_drop)
149 |     for i in range(n_drop):
150 |         im = axarr[i].imshow(np_pi_csum[i]/np_csum, cmap='gray', aspect='auto')
151 |         fig.colorbar(im, ax=axarr[i])
152 |     return fig
153 | 
154 | def visualize():
155 |     sess = tf.Session()
156 |     saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars'])
157 |     saver.restore(sess, os.path.join(savedir, 'model'))
158 |     _visualize(sess)
159 |     plt.show()
160 | 
161 | def record():
162 |     sess = tf.Session()
163 |     saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars'])
164 |     saver.restore(sess, os.path.join(savedir, 'model'))
165 |     logger = Accumulator('cent', 'acc')
166 |     to_run = [tnet['cent'], tnet['acc']] + tnet['n_active']
167 |     np_n_active = [0]*n_drop
168 |     for j in range(n_test_batches):
169 |         bx, by = mnist.test.next_batch(batch_size)
170 |         res = sess.run(to_run, {x:bx, y:by})
171 |         logger.accum(res[:-n_drop])
172 |         np_n_active = [a + b for a, b in zip(np_n_active, res[-n_drop:])]
173 |     np_n_active = [int(a/n_test_batches) for a in np_n_active]
174 | 
175 |     if not os.path.isdir('../../records'):
176 |         os.makedirs('../../records')
177 |     csvfn = os.path.join('../../records',
178 |             'dbbdropout_lenet_dense.csv' if args.csvfn is None else args.csvfn)
179 | 
180 |     if csvfn is not None:
181 |         flag = 'a' if os.path.exists(csvfn) else 'w'
182 |         with open(csvfn, flag) as f:
183 |             writer = csv.writer(f)
184 |             if flag=='w':
185 |                 writer.writerow(['savedir', 'cent', 'acc', 'n_active'])
186 |             line = [savedir]
187 |             line.append('%.4f' % logger.get('cent'))
188 |             line.append('%.4f' % logger.get('acc'))
189 |             line.append('-'.join(str(x) for x in np_n_active))
190 |             writer.writerow(line)
191 | 
192 | if __name__=='__main__':
193 |     if args.mode == 'train':
194 |         train()
195 |     elif args.mode == 'test':
196 |         test()
197 |     elif args.mode == 'vis':
198 |         visualize()
199 |     elif args.mode == 'record':
200 |         record()
201 |     else:
202 |         raise ValueError('Invalid mode %s' % args.mode)
203 | 


--------------------------------------------------------------------------------
/scripts/lenet_dense/model:
--------------------------------------------------------------------------------
1 | ../../model


--------------------------------------------------------------------------------
/scripts/lenet_dense/pretrain.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import tensorflow as tf
 3 | from tensorflow.examples.tutorials.mnist import input_data
 4 | from model.lenet import lenet_dense
 5 | from utils.accumulator import Accumulator
 6 | from utils.train import *
 7 | from utils.mnist import mnist_input
 8 | import time
 9 | import os
10 | import argparse
11 | 
12 | parser = argparse.ArgumentParser()
13 | parser.add_argument('--batch_size', type=int, default=100)
14 | parser.add_argument('--n_epochs', type=int, default=200)
15 | parser.add_argument('--save_freq', type=int, default=20)
16 | parser.add_argument('--savedir', type=str, default=None)
17 | parser.add_argument('--mode', type=str, default='train')
18 | parser.add_argument('--gpu_num', type=int, default=0)
19 | args = parser.parse_args()
20 | 
21 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
22 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num)
23 | 
24 | savedir = './results/pretrained' if args.savedir is None else args.savedir
25 | if not os.path.isdir(savedir):
26 |     os.makedirs(savedir)
27 | 
28 | batch_size = args.batch_size
29 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size)
30 | x = tf.placeholder(tf.float32, [None, 784])
31 | y = tf.placeholder(tf.float32, [None, 10])
32 | net = lenet_dense(x, y, True)
33 | tnet = lenet_dense(x, y, False, reuse=True)
34 | 
35 | def train():
36 |     loss = net['cent'] + net['wd']
37 |     global_step = tf.train.get_or_create_global_step()
38 |     lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32),
39 |             [n_train_batches*args.n_epochs/2], [1e-4, 1e-5])
40 |     train_op = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step)
41 | 
42 |     saver = tf.train.Saver(net['weights'])
43 |     logfile = open(os.path.join(savedir, 'train.log'), 'w', 0)
44 | 
45 |     sess = tf.Session()
46 |     sess.run(tf.global_variables_initializer())
47 | 
48 |     train_logger = Accumulator('cent', 'acc')
49 |     train_to_run = [train_op, net['cent'], net['acc']]
50 |     test_logger = Accumulator('cent', 'acc')
51 |     test_to_run = [tnet['cent'], tnet['acc']]
52 |     for i in range(args.n_epochs):
53 |         line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr))
54 |         print (line)
55 |         logfile.write(line + '\n')
56 |         train_logger.clear()
57 |         start = time.time()
58 |         for j in range(n_train_batches):
59 |             bx, by = mnist.train.next_batch(batch_size)
60 |             train_logger.accum(sess.run(train_to_run, {x:bx, y:by}))
61 |         train_logger.print_(header='train', epoch=i+1,
62 |                 time=time.time()-start, logfile=logfile)
63 | 
64 |         test_logger.clear()
65 |         for j in range(n_test_batches):
66 |             bx, by = mnist.test.next_batch(batch_size)
67 |             test_logger.accum(sess.run(test_to_run, {x:bx, y:by}))
68 |         test_logger.print_(header='test', epoch=i+1,
69 |                 time=time.time()-start, logfile=logfile)
70 | 
71 |         print()
72 |         logfile.write('\n')
73 |         if (i+1)%args.save_freq == 0:
74 |             saver.save(sess, os.path.join(savedir, 'model'))
75 | 
76 |     logfile.close()
77 |     saver.save(sess, os.path.join(savedir, 'model'))
78 | 
79 | def test():
80 |     sess = tf.Session()
81 |     saver = tf.train.Saver(tnet['weights'])
82 |     saver.restore(sess, os.path.join(savedir, 'model'))
83 |     logger = Accumulator('cent', 'acc')
84 |     to_run = [tnet['cent'], tnet['acc']]
85 |     for j in range(n_test_batches):
86 |         bx, by = mnist.test.next_batch(batch_size)
87 |         logger.accum(sess.run(to_run, {x:bx, y:by}))
88 |     logger.print_(header='test')
89 | 
90 | if __name__=='__main__':
91 |     if args.mode == 'train':
92 |         train()
93 |     elif args.mode == 'test':
94 |         test()
95 |     else:
96 |         raise ValueError('Invalid mode %s' % args.mode)
97 | 


--------------------------------------------------------------------------------
/scripts/lenet_dense/sbpdropout.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import tensorflow as tf
  3 | from tensorflow.examples.tutorials.mnist import input_data
  4 | from model.lenet import lenet_dense
  5 | from model.sbpdropout import sbpdropout
  6 | from utils.accumulator import Accumulator
  7 | from utils.train import *
  8 | from utils.mnist import mnist_input
  9 | import time
 10 | import os
 11 | import argparse
 12 | import csv
 13 | 
 14 | parser = argparse.ArgumentParser()
 15 | parser.add_argument('--batch_size', type=int, default=100)
 16 | parser.add_argument('--n_epochs', type=int, default=200)
 17 | parser.add_argument('--save_freq', type=int, default=20)
 18 | parser.add_argument('--savedir', type=str, default=None)
 19 | parser.add_argument('--pretraindir', type=str, default=None)
 20 | parser.add_argument('--mode', type=str, default='train')
 21 | parser.add_argument('--gpu_num', type=int, default=0)
 22 | parser.add_argument('--csvfn', type=str, default=None)
 23 | args = parser.parse_args()
 24 | 
 25 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
 26 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num)
 27 | 
 28 | pretraindir = './results/pretrained' if args.pretraindir is None else args.pretraindir
 29 | savedir = './results/sbpdropout/sample_run' if args.savedir is None else args.savedir
 30 | if not os.path.isdir(savedir):
 31 |     os.makedirs(savedir)
 32 | 
 33 | batch_size = args.batch_size
 34 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size)
 35 | x = tf.placeholder(tf.float32, [None, 784])
 36 | y = tf.placeholder(tf.float32, [None, 10])
 37 | N = mnist.train.num_examples
 38 | dropout = sbpdropout
 39 | net = lenet_dense(x, y, True, dropout=dropout)
 40 | tnet = lenet_dense(x, y, False, reuse=True, dropout=dropout)
 41 | 
 42 | def train():
 43 |     loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd']
 44 |     global_step = tf.train.get_or_create_global_step()
 45 |     bdr = [int(n_train_batches*(args.n_epochs-1)*r) for r in [0.5, 0.75]]
 46 |     vals = [1e-2, 1e-3, 1e-4]
 47 |     lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals)
 48 |     train_op1 = tf.train.AdamOptimizer(lr).minimize(loss,
 49 |             var_list=net['qpi_vars'], global_step=global_step)
 50 |     train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss,
 51 |             var_list=net['weights'])
 52 |     train_op = tf.group(train_op1, train_op2)
 53 | 
 54 |     pretrain_saver = tf.train.Saver(net['weights'])
 55 |     saver = tf.train.Saver(net['weights']+net['qpi_vars'])
 56 |     logfile = open(os.path.join(savedir, 'train.log'), 'w', 0)
 57 | 
 58 |     sess = tf.Session()
 59 |     sess.run(tf.global_variables_initializer())
 60 |     pretrain_saver.restore(sess, os.path.join(pretraindir, 'model'))
 61 | 
 62 |     train_logger = Accumulator('cent', 'acc')
 63 |     train_to_run = [train_op, net['cent'], net['acc']]
 64 |     test_logger = Accumulator('cent', 'acc')
 65 |     test_to_run = [tnet['cent'], tnet['acc']]
 66 |     for i in range(args.n_epochs):
 67 |         line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr))
 68 |         print(line)
 69 |         logfile.write(line + '\n')
 70 |         train_logger.clear()
 71 |         start = time.time()
 72 |         for j in range(n_train_batches):
 73 |             bx, by = mnist.train.next_batch(batch_size)
 74 |             train_logger.accum(sess.run(train_to_run, {x:bx, y:by}))
 75 |         train_logger.print_(header='train', epoch=i+1,
 76 |                 time=time.time()-start, logfile=logfile)
 77 | 
 78 |         test_logger.clear()
 79 |         for j in range(n_test_batches):
 80 |             bx, by = mnist.test.next_batch(batch_size)
 81 |             test_logger.accum(sess.run(test_to_run, {x:bx, y:by}))
 82 |         test_logger.print_(header='test', epoch=i+1,
 83 |                 time=time.time()-start, logfile=logfile)
 84 |         line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
 85 |         line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n'
 86 |         print(line)
 87 |         logfile.write(line+'\n')
 88 |         if (i+1)%args.save_freq == 0:
 89 |             saver.save(sess, os.path.join(savedir, 'model'))
 90 | 
 91 |     logfile.close()
 92 |     saver.save(sess, os.path.join(savedir, 'model'))
 93 | 
 94 | def test():
 95 |     sess = tf.Session()
 96 |     saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars'])
 97 |     saver.restore(sess, os.path.join(savedir, 'model'))
 98 |     logger = Accumulator('cent', 'acc')
 99 |     to_run = [tnet['cent'], tnet['acc']]
100 |     for j in range(n_test_batches):
101 |         bx, by = mnist.test.next_batch(batch_size)
102 |         logger.accum(sess.run(to_run, {x:bx, y:by}))
103 |     logger.print_(header='test')
104 |     line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
105 |     line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n'
106 |     print(line)
107 | 
108 | def record():
109 |     sess = tf.Session()
110 |     saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars'])
111 |     saver.restore(sess, os.path.join(savedir, 'model'))
112 |     logger = Accumulator('cent', 'acc')
113 |     to_run = [tnet['cent'], tnet['acc']]
114 |     for j in range(n_test_batches):
115 |         bx, by = mnist.test.next_batch(batch_size)
116 |         logger.accum(sess.run(to_run, {x:bx, y:by}))
117 |     np_n_active = sess.run(tnet['n_active'])
118 | 
119 |     if not os.path.isdir('../../records'):
120 |         os.makedirs('../../records')
121 |     csvfn = os.path.join('../../records',
122 |             'sbpdropout_lenet_dense.csv' if args.csvfn is None else args.csvfn)
123 | 
124 |     if csvfn is not None:
125 |         flag = 'a' if os.path.exists(csvfn) else 'w'
126 |         with open(csvfn, flag) as f:
127 |             writer = csv.writer(f)
128 |             if flag=='w':
129 |                 writer.writerow(['savedir', 'cent', 'acc', 'n_active'])
130 |             line = [savedir]
131 |             line.append('%.4f' % logger.get('cent'))
132 |             line.append('%.4f' % logger.get('acc'))
133 |             line.append('-'.join(str(x) for x in np_n_active))
134 |             writer.writerow(line)
135 | 
136 | if __name__=='__main__':
137 |     if args.mode == 'train':
138 |         train()
139 |     elif args.mode == 'test':
140 |         test()
141 |     elif args.mode == 'record':
142 |         record()
143 |     else:
144 |         raise ValueError('Invalid mode %s' % args.mode)
145 | 


--------------------------------------------------------------------------------
/scripts/lenet_dense/utils:
--------------------------------------------------------------------------------
1 | ../../utils/


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenXAIProject/Network-Structure-Dropout/723df2d2392ec16eca3452d4afb81d54c4a2f841/utils/__init__.py


--------------------------------------------------------------------------------
/utils/accumulator.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | class Accumulator():
 4 |     def __init__(self, *args):
 5 |         self.args = args
 6 |         self.argdict = {}
 7 |         for i, arg in enumerate(args):
 8 |             self.argdict[arg] = i
 9 |         self.sums = [0]*len(args)
10 |         self.cnt = 0
11 | 
12 |     def accum(self, val):
13 |         val = [val] if type(val) is not list else val
14 |         val = [v for v in val if v is not None]
15 |         assert(len(val) == len(self.args))
16 |         for i in range(len(val)):
17 |             self.sums[i] += val[i]
18 |         self.cnt += 1
19 | 
20 |     def clear(self):
21 |         self.sums = [0]*len(self.args)
22 |         self.cnt = 0
23 | 
24 |     def get(self, arg, avg=True):
25 |         i = self.argdict.get(arg, -1)
26 |         assert(i is not -1)
27 |         return (self.sums[i]/self.cnt if avg else self.sums[i])
28 | 
29 |     def print_(self, header=None, epoch=None, it=None, time=None,
30 |             logfile=None, do_not_print=[], as_int=[],
31 |             avg=True):
32 |         line = '' if header is None else header + ': '
33 |         if epoch is not None:
34 |             line += ('epoch %d, ' % epoch)
35 |         if it is not None:
36 |             line += ('iter %d, ' % it)
37 |         if time is not None:
38 |             line += ('(%.3f secs), ' % time)
39 | 
40 |         args = [arg for arg in self.args if arg not in do_not_print]
41 | 
42 |         for arg in args[:-1]:
43 |             val = self.sums[self.argdict[arg]]
44 |             if avg:
45 |                 val /= self.cnt
46 |             if arg in as_int:
47 |                 line += ('%s %d, ' % (arg, int(val)))
48 |             else:
49 |                 line += ('%s %f, ' % (arg, val))
50 |         val = self.sums[self.argdict[args[-1]]]
51 |         if avg:
52 |             val /= self.cnt
53 |         if arg in as_int:
54 |             line += ('%s %d, ' % (arg, int(val)))
55 |         else:
56 |             line += ('%s %f' % (args[-1], val))
57 |         print(line)
58 | 
59 |         if logfile is not None:
60 |             logfile.write(line + '\n')
61 | 


--------------------------------------------------------------------------------
/utils/cifar10.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import os
 4 | import sys
 5 | from paths import CIFAR10_PATH
 6 | 
 7 | HEIGHT = 32
 8 | WIDTH = 32
 9 | DEPTH = 3
10 | NUM_CLASSES = 10
11 | NUM_DATA_FILES = 5
12 | NUM_TRAIN = 10000 * NUM_DATA_FILES
13 | NUM_TEST = 10000
14 | 
15 | def record_dataset(filenames):
16 |     label_bytes = 1
17 |     image_bytes = DEPTH * HEIGHT * WIDTH
18 |     record_bytes = label_bytes + image_bytes
19 |     return tf.data.FixedLengthRecordDataset(filenames, record_bytes)
20 | 
21 | def get_filenames(training):
22 |     data_dir = os.path.join(CIFAR10_PATH, 'cifar-10-batches-bin')
23 |     if training:
24 |         return [os.path.join(data_dir, 'data_batch_%d.bin' % i)
25 |                 for i in range(1, NUM_DATA_FILES+1)]
26 |     else:
27 |         return [os.path.join(data_dir, 'test_batch.bin')]
28 | 
29 | def parse_record(raw_record):
30 |     """Parse a CIFAR-10 record from value."""
31 |     # Every record consists of a label followed by the image, with a fixed number
32 |     # of bytes for each.
33 |     label_offset = 0
34 |     label_bytes = 1
35 |     image_bytes = DEPTH * HEIGHT * WIDTH
36 |     record_bytes = label_bytes + image_bytes
37 | 
38 |     # Convert from a string to a vector of uint8 that is record_bytes long.
39 |     record_vector = tf.decode_raw(raw_record, tf.uint8)
40 | 
41 |     # The first byte represents the label, which we convert from uint8 to int32.
42 |     label = tf.cast(record_vector[label_offset], tf.int32)
43 |     label = tf.one_hot(label, NUM_CLASSES)
44 | 
45 |     # The remaining bytes after the label represent the image, which we reshape
46 |     # from [depth * height * width] to [depth, height, width].
47 |     depth_major = tf.reshape(
48 |             record_vector[label_offset+label_bytes:record_bytes],
49 |             [DEPTH, HEIGHT, WIDTH])
50 | 
51 |     # Convert from [depth, height, width] to [height, width, depth], and cast as
52 |     # float32.
53 |     image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32)
54 |     return image, label
55 | 
56 | def preprocess_image(image, training):
57 |   """Preprocess a single image of layout [height, width, depth]."""
58 |   if training:
59 |     # Resize the image to add four extra pixels on each side.
60 |     image = tf.image.resize_image_with_crop_or_pad(
61 |         image, HEIGHT + 8, WIDTH + 8)
62 | 
63 |     # Randomly crop a [HEIGHT, WIDTH] section of the image.
64 |     image = tf.random_crop(image, [HEIGHT, WIDTH, DEPTH])
65 | 
66 |     # Randomly flip the image horizontally.
67 |     image = tf.image.random_flip_left_right(image)
68 | 
69 |   # Subtract off the mean and divide by the variance of the pixels.
70 |   image = tf.image.per_image_standardization(image)
71 | 
72 |   # transpose image back to depth major
73 |   image = tf.transpose(image, [2, 1, 0])
74 | 
75 |   return image
76 | 
77 | def cifar10_input(batch_size, training):
78 |     dataset = record_dataset(get_filenames(training))
79 | 
80 |     if training:
81 |         dataset = dataset.shuffle(buffer_size=NUM_TRAIN)
82 | 
83 |     dataset = dataset.map(parse_record)
84 |     dataset = dataset.map(
85 |             lambda image, label: (preprocess_image(image, training), label))
86 | 
87 |     dataset = dataset.prefetch(8 * batch_size)
88 |     dataset = dataset.repeat()
89 |     dataset = dataset.batch(batch_size)
90 |     iterator = dataset.make_one_shot_iterator()
91 |     images, labels = iterator.get_next()
92 | 
93 |     return images, labels
94 | 


--------------------------------------------------------------------------------
/utils/cifar100.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import os
 4 | import sys
 5 | from paths import CIFAR100_PATH
 6 | 
 7 | HEIGHT = 32
 8 | WIDTH = 32
 9 | DEPTH = 3
10 | NUM_SUPER_CLASSES = 20
11 | NUM_CLASSES = 100
12 | NUM_TRAIN = 50000
13 | NUM_TEST = 10000
14 | 
15 | def record_dataset(filenames):
16 |     label_bytes = 2
17 |     image_bytes = DEPTH * HEIGHT * WIDTH
18 |     record_bytes = label_bytes + image_bytes
19 |     return tf.data.FixedLengthRecordDataset(filenames, record_bytes)
20 | 
21 | def get_filenames(training):
22 |     data_dir = os.path.join(CIFAR100_PATH, 'cifar-100-binary')
23 |     if training:
24 |         return [os.path.join(data_dir, 'train.bin')]
25 |     else:
26 |         return [os.path.join(data_dir, 'test.bin')]
27 | 
28 | def parse_record(raw_record):
29 |     """Parse a CIFAR-100 record from value."""
30 |     # Every record consists of a label followed by the image, with a fixed number
31 |     # of bytes for each.
32 |     label_bytes = 2
33 |     image_bytes = DEPTH * HEIGHT * WIDTH
34 |     record_bytes = label_bytes + image_bytes
35 | 
36 |     # Convert from a string to a vector of uint8 that is record_bytes long.
37 |     record_vector = tf.decode_raw(raw_record, tf.uint8)
38 | 
39 |     # The first and second bytes represent the super label and the label,
40 |     # which we convert from uint8 to int32.
41 |     slabel = tf.cast(record_vector[0], tf.int32)
42 |     slabel = tf.one_hot(slabel, NUM_SUPER_CLASSES)
43 |     label = tf.cast(record_vector[1], tf.int32)
44 |     label = tf.one_hot(label, NUM_CLASSES)
45 | 
46 |     # The remaining bytes after the label represent the image, which we reshape
47 |     # from [depth * height * width] to [depth, height, width].
48 |     depth_major = tf.reshape(
49 |             record_vector[label_bytes:record_bytes],
50 |             [DEPTH, HEIGHT, WIDTH])
51 | 
52 |     # Convert from [depth, height, width] to [height, width, depth], and cast as
53 |     # float32.
54 |     image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32)
55 |     return image, slabel, label
56 | 
57 | def preprocess_image(image, training):
58 |   """Preprocess a single image of layout [height, width, depth]."""
59 |   if training:
60 |     # Resize the image to add four extra pixels on each side.
61 |     image = tf.image.resize_image_with_crop_or_pad(
62 |         image, HEIGHT + 8, WIDTH + 8)
63 | 
64 |     # Randomly crop a [HEIGHT, WIDTH] section of the image.
65 |     image = tf.random_crop(image, [HEIGHT, WIDTH, DEPTH])
66 | 
67 |     # Randomly flip the image horizontally.
68 |     image = tf.image.random_flip_left_right(image)
69 | 
70 |   # Subtract off the mean and divide by the variance of the pixels.
71 |   image = tf.image.per_image_standardization(image)
72 | 
73 |   # transpose image back to depth major
74 |   image = tf.transpose(image, [2, 1, 0])
75 | 
76 |   return image
77 | 
78 | def cifar100_input(batch_size, training):
79 |     dataset = record_dataset(get_filenames(training))
80 | 
81 |     if training:
82 |         dataset = dataset.shuffle(buffer_size=NUM_TRAIN)
83 | 
84 |     dataset = dataset.map(parse_record)
85 |     dataset = dataset.map(
86 |             lambda image, slabel, label: \
87 |                     (preprocess_image(image, training), slabel, label))
88 | 
89 |     dataset = dataset.prefetch(8 * batch_size)
90 |     dataset = dataset.repeat()
91 |     dataset = dataset.batch(batch_size)
92 |     iterator = dataset.make_one_shot_iterator()
93 |     images, slabels, labels = iterator.get_next()
94 | 
95 |     return images, slabels, labels
96 | 


--------------------------------------------------------------------------------
/utils/mnist.py:
--------------------------------------------------------------------------------
1 | from tensorflow.examples.tutorials.mnist import input_data
2 | from paths import MNIST_PATH
3 | 
4 | def mnist_input(batch_size):
5 |     mnist = input_data.read_data_sets(MNIST_PATH, one_hot=True, validation_size=0)
6 |     n_train_batches = mnist.train.num_examples/batch_size
7 |     n_test_batches = mnist.test.num_examples/batch_size
8 |     return mnist, n_train_batches, n_test_batches
9 | 


--------------------------------------------------------------------------------
/utils/train.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python.client import device_lib
 3 | 
 4 | def cross_entropy(logits, labels):
 5 |     return tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=labels)
 6 | 
 7 | def weight_decay(decay, var_list=None):
 8 |     var_list = tf.trainable_variables() if var_list is None else var_list
 9 |     return decay*tf.add_n([tf.nn.l2_loss(var) for var in var_list])
10 | 
11 | def accuracy(logits, labels):
12 |     correct = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
13 |     return tf.reduce_mean(tf.cast(correct, tf.float32))
14 | 
15 | def get_train_op(optim, loss, global_step=None, clip=None, var_list=None):
16 |     update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
17 |     with tf.control_dependencies(update_ops):
18 |         grad_and_vars = optim.compute_gradients(loss, var_list=var_list)
19 |         if clip is not None:
20 |             grad_and_vars = [((None if grad is None \
21 |                     else tf.clip_by_norm(grad, clip)), var) \
22 |                     for grad, var in grad_and_vars]
23 |         train_op = optim.apply_gradients(grad_and_vars, global_step=global_step)
24 |         return train_op
25 | 
26 | # copied from https://stackoverflow.com/a/38580201
27 | def get_available_gpus():
28 |     local_device_protos = device_lib.list_local_devices()
29 |     mem_thres = 0.3*max([x.memory_limit for x in local_device_protos \
30 |             if x.device_type=='GPU'])
31 |     return [x.name for x in local_device_protos if x.device_type=='GPU' \
32 |             and x.memory_limit > mem_thres]
33 | 
34 | def average_gradients(tower_grads):
35 |   """Calculate the average gradient for each shared variable across all towers.
36 | 
37 |   Note that this function provides a synchronization point across all towers.
38 | 
39 |   Args:
40 |     tower_grads: List of lists of (gradient, variable) tuples. The outer list
41 |       is over individual gradients. The inner list is over the gradient
42 |       calculation for each tower.
43 |   Returns:
44 |      List of pairs of (gradient, variable) where the gradient has been averaged
45 |      across all towers.
46 |   """
47 |   average_grads = []
48 |   for grad_and_vars in zip(*tower_grads):
49 |     # Note that each grad_and_vars looks like the following:
50 |     #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
51 |     grads = []
52 |     for g, _ in grad_and_vars:
53 |       # Add 0 dimension to the gradients to represent the tower.
54 |       expanded_g = tf.expand_dims(g, 0)
55 | 
56 |       # Append on a 'tower' dimension which we will average over below.
57 |       grads.append(expanded_g)
58 | 
59 |     # Average over the 'tower' dimension.
60 |     grad = tf.concat(axis=0, values=grads)
61 |     grad = tf.reduce_mean(grad, 0)
62 | 
63 |     # Keep in mind that the Variables are redundant because they are shared
64 |     # across towers. So .. we will just return the first tower's pointer to
65 |     # the Variable.
66 |     v = grad_and_vars[0][1]
67 |     grad_and_var = (grad, v)
68 |     average_grads.append(grad_and_var)
69 |   return average_grads
70 | 


--------------------------------------------------------------------------------