├── .gitignore
├── AUTHOR.txt
├── LICENSE
├── README.md
├── model
├── __init__.py
├── bbdropout.py
├── digamma.py
├── layers.py
├── lenet.py
├── sbpdropout.py
└── utils
├── scripts
├── lenet_conv
│ ├── bbdropout.py
│ ├── dbbdropout.py
│ ├── model
│ ├── pretrain.py
│ ├── sbpdropout.py
│ └── utils
└── lenet_dense
│ ├── bbdropout.py
│ ├── dbbdropout.py
│ ├── model
│ ├── pretrain.py
│ ├── sbpdropout.py
│ └── utils
└── utils
├── __init__.py
├── accumulator.py
├── cifar10.py
├── cifar100.py
├── mnist.py
└── train.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.log
3 | *.npy
4 | *.npz
5 | *.ckpt
6 | *.tar
7 | *.out
8 | *.sh
9 | utils/paths.py
10 | **/results/
11 | /records/
12 |
--------------------------------------------------------------------------------
/AUTHOR.txt:
--------------------------------------------------------------------------------
1 | Copyright 2018 (Institution) under XAI Project supported by Ministry of Science and ICT, Korea
2 |
3 | # This is the list of (Institution) for copyright purposes.
4 | # This does not necessarily list everyone who has contributed code, since in
5 | # some cases, their employer may be the copyright holder. To see the full list
6 | # of contributors, see the revision history in source control
7 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Learning Network Structure with Dropout
2 |
3 | ### **CONTENT**
4 | > Data-dependent variational dropout for learning a network structure
5 | ### **How to Use**
6 |
7 | ```bash
8 | $ cd ~/[WORKING_DIR]/scripts/lenet_dense
9 | $ python ./pretrain.py
10 | $ python ./bbdropout.py
11 | ```
12 |
13 |
14 |
15 | # XAI Project
16 |
17 | ### **Project Name**
18 | > A machine learning and statistical inference framework for explainable artificial intelligence(의사결정 이유를 설명할 수 있는 인간 수준의 학습·추론 프레임워크 개발)
19 | ### **Managed by**
20 | > Ministry of Science and ICT/XAIC
21 | ### **Participated Affiliation**
22 | > UNIST, Korea Univ., Yonsei Univ., KAIST., AItrics
23 | ### **Web Site**
24 | >
25 |
--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenXAIProject/Network-Structure-Dropout/723df2d2392ec16eca3452d4afb81d54c4a2f841/model/__init__.py
--------------------------------------------------------------------------------
/model/bbdropout.py:
--------------------------------------------------------------------------------
1 | from layers import *
2 | import tensorflow as tf
3 | from tensorflow.contrib.distributions import RelaxedBernoulli
4 | import numpy as np
5 |
6 | digamma = tf.digamma
7 | from digamma import digamma_approx as digamma_approx
8 | lgamma = tf.lgamma
9 | Euler = 0.577215664901532
10 |
11 | def bbdropout(x, training,
12 | alpha=1e-4, thres=1e-2, a_init=-1., tau=1e-1, center_init=1.0,
13 | approx_digamma=True, scale_kl=None, dep=False,
14 | unit_scale=True, collect=True,
15 | name='bbdropout', reuse=None):
16 |
17 | N = tf.shape(x)[0]
18 | K = x.shape[1].value
19 | is_conv = len(x.shape)==4
20 |
21 | with tf.variable_scope(name+'/qpi_vars', reuse=reuse):
22 | with tf.device('/cpu:0'):
23 | a = softplus(tf.get_variable('a_uc', shape=[K],
24 | initializer=tf.constant_initializer(a_init)))
25 | b = softplus(tf.get_variable('b_uc', shape=[K]))
26 |
27 | _digamma = digamma_approx if approx_digamma else digamma
28 | kl = (a-alpha)/a * (-Euler - _digamma(b) - 1/b) \
29 | + log(a*b) - log(alpha) - (b-1)/b
30 | pi = (1 - tf.random_uniform([K])**(1/b))**(1/a) if training else \
31 | b*tf.exp(lgamma(1+1/a) + lgamma(b) - lgamma(1+1/a+b))
32 |
33 | def hard_sigmoid(x):
34 | return tf.clip_by_value(x, thres, 1-thres)
35 |
36 | if dep:
37 | with tf.variable_scope(name+'/pzx_vars', reuse=reuse):
38 | hid = global_avg_pool(x) if is_conv else x
39 | hid = tf.stop_gradient(hid)
40 | with tf.device('/cpu:0'):
41 | hid = layer_norm(hid, scale=False, center=False)
42 | scale = tf.get_variable('scale', shape=[1 if unit_scale else K],
43 | initializer=tf.ones_initializer())
44 | center = tf.get_variable('center', shape=[K],
45 | initializer=tf.constant_initializer(center_init))
46 | hid = scale*hid + center
47 | if training:
48 | pi = pi * hard_sigmoid(hid + tf.random_normal(shape=tf.shape(hid)))
49 | z = RelaxedBernoulli(tau, logits=logit(pi)).sample()
50 | else:
51 | pi = pi * hard_sigmoid(hid)
52 | z = tf.where(tf.greater(pi, thres), pi, tf.zeros_like(pi))
53 | #n_active = tf.reduce_mean(
54 | # tf.reduce_sum(tf.cast(tf.greater(pi, thres), tf.int32), 1))
55 | n_active = tf.reduce_sum(tf.cast(tf.greater(pi, thres), tf.int32), 1)
56 | n_active = tf.reduce_sum(n_active)/N
57 | else:
58 | if training:
59 | z = RelaxedBernoulli(tau, logits=logit(pi)).sample(N)
60 | else:
61 | pi_ = tf.where(tf.greater(pi, thres), pi, tf.zeros_like(pi))
62 | z = tf.tile(tf.expand_dims(pi_, 0), [N, 1])
63 | n_active = tf.reduce_sum(tf.cast(tf.greater(pi, thres), tf.int32))
64 |
65 | if scale_kl is None:
66 | kl = tf.reduce_sum(kl)
67 | else:
68 | kl = scale_kl * tf.reduce_mean(kl)
69 |
70 | if collect:
71 | if reuse is not True:
72 | tf.add_to_collection('kl', kl)
73 | prefix = 'train_' if training else 'test_'
74 | tf.add_to_collection(prefix+'pi', pi)
75 | tf.add_to_collection(prefix+'n_active', n_active)
76 |
77 | z = tf.reshape(z, ([-1, K, 1, 1] if is_conv else [-1, K]))
78 | return x*z
79 |
--------------------------------------------------------------------------------
/model/digamma.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | # @MISC {1446110,
3 | # TITLE = {Approximating the Digamma function},
4 | # AUTHOR = {njuffa (https://math.stackexchange.com/users/114200/njuffa)},
5 | # HOWPUBLISHED = {Mathematics Stack Exchange},
6 | # NOTE = {URL:https://math.stackexchange.com/q/1446110 (version: 2015-09-22)},
7 | # EPRINT = {https://math.stackexchange.com/q/1446110},
8 | # URL = {https://math.stackexchange.com/q/1446110}}
9 |
10 | def digamma_approx(x):
11 | def digamma_over_one(x):
12 | return tf.log(x + 0.4849142940227510) \
13 | - 1/(1.0271785180163817*x)
14 | return digamma_over_one(x+1) - 1./x
15 |
--------------------------------------------------------------------------------
/model/layers.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 |
4 | exp = tf.exp
5 | log = lambda x: tf.log(x + 1e-20)
6 | logit = lambda x: log(x) - log(1-x)
7 | softplus = tf.nn.softplus
8 | softmax = tf.nn.softmax
9 | tanh = tf.nn.tanh
10 | relu = tf.nn.relu
11 | sigmoid = tf.nn.sigmoid
12 |
13 | dense = tf.layers.dense
14 | flatten = tf.contrib.layers.flatten
15 |
16 | def conv(x, filters, kernel_size=3, strides=1, **kwargs):
17 | return tf.layers.conv2d(x, filters, kernel_size, strides,
18 | data_format='channels_first', **kwargs)
19 |
20 | def pool(x, **kwargs):
21 | return tf.layers.max_pooling2d(x, 2, 2,
22 | data_format='channels_first', **kwargs)
23 |
24 | def global_avg_pool(x):
25 | return tf.reduce_mean(x, axis=[2, 3])
26 |
27 | batch_norm = tf.layers.batch_normalization
28 | layer_norm = tf.contrib.layers.layer_norm
29 |
--------------------------------------------------------------------------------
/model/lenet.py:
--------------------------------------------------------------------------------
1 | from layers import *
2 | from utils.train import *
3 |
4 | def lenet_dense(x, y, training, name='lenet', reuse=None,
5 | dropout=None, **dropout_kwargs):
6 | dropout_ = lambda x, subname: x if dropout is None else \
7 | dropout(x, training, name=name+subname, reuse=reuse,
8 | **dropout_kwargs)
9 | x = dense(dropout_(x, '/dropout1'), 500, activation=relu,
10 | name=name+'/dense1', reuse=reuse)
11 | x = dense(dropout_(x, '/dropout2'), 300, activation=relu,
12 | name=name+'/dense2', reuse=reuse)
13 | x = dense(dropout_(x, '/dropout3'), 10, name=name+'/dense3', reuse=reuse)
14 |
15 | net = {}
16 | all_vars = tf.get_collection('variables', scope=name)
17 | net['qpi_vars'] = [v for v in all_vars if 'qpi_vars' in v.name]
18 | net['pzx_vars'] = [v for v in all_vars if 'pzx_vars' in v.name]
19 | net['weights'] = [v for v in all_vars \
20 | if 'qpi_vars' not in v.name and 'pzx_vars' not in v.name]
21 |
22 | net['cent'] = cross_entropy(x, y)
23 | net['wd'] = weight_decay(1e-4, var_list=net['weights'])
24 | net['acc'] = accuracy(x, y)
25 |
26 | prefix = 'train_' if training else 'test_'
27 | net['kl'] = tf.get_collection('kl')
28 | net['pi'] = tf.get_collection(prefix+'pi')
29 | net['n_active'] = tf.get_collection(prefix+'n_active')
30 |
31 | return net
32 |
33 | def lenet_conv(x, y, training, name='lenet', reuse=None,
34 | dropout=None, **dropout_kwargs):
35 | dropout_ = lambda x, subname: x if dropout is None else \
36 | dropout(x, training, name=name+subname, reuse=reuse,
37 | **dropout_kwargs)
38 | x = tf.reshape(x, [-1, 1, 28, 28])
39 | x = conv(x, 20, 5, name=name+'/conv1', reuse=reuse)
40 | x = relu(dropout_(x, '/dropout1'))
41 | x = pool(x, name=name+'/pool1')
42 | x = conv(x, 50, 5, name=name+'/conv2', reuse=reuse)
43 | x = relu(dropout_(x, '/dropout2'))
44 | x = pool(x, name=name+'/pool2')
45 | x = flatten(x)
46 | x = dense(dropout_(x, '/dropout3'), 500, activation=relu,
47 | name=name+'/dense1', reuse=reuse)
48 | x = dense(dropout_(x, '/dropout4'), 10, name=name+'/dense2', reuse=reuse)
49 |
50 | net = {}
51 | all_vars = tf.get_collection('variables', scope=name)
52 | net['qpi_vars'] = [v for v in all_vars if 'qpi_vars' in v.name]
53 | net['pzx_vars'] = [v for v in all_vars if 'pzx_vars' in v.name]
54 | net['weights'] = [v for v in all_vars \
55 | if 'qpi_vars' not in v.name and 'pzx_vars' not in v.name]
56 |
57 | net['cent'] = cross_entropy(x, y)
58 | net['wd'] = weight_decay(1e-4, var_list=net['weights'])
59 | net['acc'] = accuracy(x, y)
60 |
61 | prefix = 'train_' if training else 'test_'
62 | net['kl'] = tf.get_collection('kl')
63 | net['pi'] = tf.get_collection(prefix+'pi')
64 | net['n_active'] = tf.get_collection(prefix+'n_active')
65 |
66 | return net
67 |
--------------------------------------------------------------------------------
/model/sbpdropout.py:
--------------------------------------------------------------------------------
1 | # copied from https://github.com/necludov/group-sparsity-sbp
2 | import tensorflow as tf
3 | from tensorflow.python.ops.distributions import special_math
4 | import numpy as np
5 |
6 | def phi(x):
7 | return 0.5*tf.erfc(-x/tf.sqrt(2.0))
8 |
9 | def __erfinv(x):
10 | w = -tf.log((1.0-x)*(1.0+x)-1e-5)
11 | p_small = 2.81022636e-08*tf.ones_like(x)
12 | p_small = 3.43273939e-07 + p_small*(w-2.5)
13 | p_small = -3.5233877e-06 + p_small*(w-2.5)
14 | p_small = -4.39150654e-06 + p_small*(w-2.5)
15 | p_small = 0.00021858087 + p_small*(w-2.5)
16 | p_small = -0.00125372503 + p_small*(w-2.5)
17 | p_small = -0.00417768164 + p_small*(w-2.5)
18 | p_small = 0.246640727 + p_small*(w-2.5)
19 | p_small = 1.50140941 + p_small*(w-2.5)
20 |
21 | p_big = -0.000200214257*tf.ones_like(x)
22 | p_big = 0.000100950558 + p_big*(tf.sqrt(w) - 3.0)
23 | p_big = 0.00134934322 + p_big*(tf.sqrt(w) - 3.0)
24 | p_big = -0.00367342844 + p_big*(tf.sqrt(w) - 3.0)
25 | p_big = 0.00573950773 + p_big*(tf.sqrt(w) - 3.0)
26 | p_big = -0.0076224613 + p_big*(tf.sqrt(w) - 3.0)
27 | p_big = 0.00943887047 + p_big*(tf.sqrt(w) - 3.0)
28 | p_big = 1.00167406 + p_big*(tf.sqrt(w) - 3.0)
29 | p_big = 2.83297682 + p_big*(tf.sqrt(w) - 3.0)
30 |
31 | small_mask = tf.cast(tf.less(w, 5.0*tf.ones_like(w)), tf.float32)
32 | big_mask = tf.cast(tf.greater_equal(w, 5.0*tf.ones_like(w)), tf.float32)
33 | p = p_small*small_mask + p_big*big_mask
34 | return p*x
35 |
36 | def erfinv(x):
37 | return special_math.ndtri((x+1.)/2.0)/tf.sqrt(2.)
38 |
39 | def erfcx(x):
40 | """M. M. Shepherd and J. G. Laframboise,
41 | MATHEMATICS OF COMPUTATION 36, 249 (1981)
42 | """
43 | K = 3.75
44 | y = (tf.abs(x)-K) / (tf.abs(x)+K)
45 | y2 = 2.0*y
46 | (d, dd) = (-0.4e-20, 0.0)
47 | (d, dd) = (y2 * d - dd + 0.3e-20, d)
48 | (d, dd) = (y2 * d - dd + 0.97e-19, d)
49 | (d, dd) = (y2 * d - dd + 0.27e-19, d)
50 | (d, dd) = (y2 * d - dd + -0.2187e-17, d)
51 | (d, dd) = (y2 * d - dd + -0.2237e-17, d)
52 | (d, dd) = (y2 * d - dd + 0.50681e-16, d)
53 | (d, dd) = (y2 * d - dd + 0.74182e-16, d)
54 | (d, dd) = (y2 * d - dd + -0.1250795e-14, d)
55 | (d, dd) = (y2 * d - dd + -0.1864563e-14, d)
56 | (d, dd) = (y2 * d - dd + 0.33478119e-13, d)
57 | (d, dd) = (y2 * d - dd + 0.32525481e-13, d)
58 | (d, dd) = (y2 * d - dd + -0.965469675e-12, d)
59 | (d, dd) = (y2 * d - dd + 0.194558685e-12, d)
60 | (d, dd) = (y2 * d - dd + 0.28687950109e-10, d)
61 | (d, dd) = (y2 * d - dd + -0.63180883409e-10, d)
62 | (d, dd) = (y2 * d - dd + -0.775440020883e-09, d)
63 | (d, dd) = (y2 * d - dd + 0.4521959811218e-08, d)
64 | (d, dd) = (y2 * d - dd + 0.10764999465671e-07, d)
65 | (d, dd) = (y2 * d - dd + -0.218864010492344e-06, d)
66 | (d, dd) = (y2 * d - dd + 0.774038306619849e-06, d)
67 | (d, dd) = (y2 * d - dd + 0.4139027986073010e-05, d)
68 | (d, dd) = (y2 * d - dd + -0.69169733025012064e-04, d)
69 | (d, dd) = (y2 * d - dd + 0.490775836525808632e-03, d)
70 | (d, dd) = (y2 * d - dd + -0.2413163540417608191e-02, d)
71 | (d, dd) = (y2 * d - dd + 0.9074997670705265094e-02, d)
72 | (d, dd) = (y2 * d - dd + -0.26658668435305752277e-01, d)
73 | (d, dd) = (y2 * d - dd + 0.59209939998191890498e-01, d)
74 | (d, dd) = (y2 * d - dd + -0.84249133366517915584e-01, d)
75 | (d, dd) = (y2 * d - dd + -0.4590054580646477331e-02, d)
76 | d = y * d - dd + 0.1177578934567401754080e+01
77 | result = d/(1.0+2.0*tf.abs(x))
78 | result = tf.where(tf.is_nan(result), tf.ones_like(result), result)
79 | result = tf.where(tf.is_inf(result), tf.ones_like(result), result)
80 |
81 | negative_mask = tf.cast(tf.less(x, 0.0), tf.float32)
82 | positive_mask = tf.cast(tf.greater_equal(x, 0.0), tf.float32)
83 | negative_result = 2.0*tf.exp(x*x)-result
84 | negative_result = tf.where(tf.is_nan(negative_result), tf.ones_like(negative_result), negative_result)
85 | negative_result = tf.where(tf.is_inf(negative_result), tf.ones_like(negative_result), negative_result)
86 | result = negative_mask * negative_result + positive_mask * result
87 | return result
88 |
89 | def phi_inv(x):
90 | return tf.sqrt(2.0)*erfinv(2.0*x-1)
91 |
92 | def mean_truncated_log_normal_straight(mu, sigma, a, b):
93 | alpha = (a - mu)/sigma
94 | beta = (b - mu)/sigma
95 | z = phi(beta) - phi(alpha)
96 | mean = tf.exp(mu+sigma*sigma/2.0)/z*(phi(sigma-alpha) - phi(sigma-beta))
97 | return mean
98 |
99 | def mean_truncated_log_normal_reduced(mu, sigma, a, b):
100 | alpha = (a - mu)/sigma
101 | beta = (b - mu)/sigma
102 | z = phi(beta) - phi(alpha)
103 | mean = erfcx((sigma-beta)/tf.sqrt(2.0))*tf.exp(b-beta*beta/2)
104 | mean = mean - erfcx((sigma-alpha)/tf.sqrt(2.0))*tf.exp(a-alpha*alpha/2)
105 | mean = mean/(2*z)
106 | return mean
107 |
108 | def mean_truncated_log_normal(mu, sigma, a, b):
109 | return mean_truncated_log_normal_reduced(mu, sigma, a, b)
110 |
111 | def median_truncated_log_normal(mu, sigma, a, b):
112 | alpha = (a - mu)/sigma
113 | beta = (b - mu)/sigma
114 | gamma = phi(alpha)+0.5*(phi(beta)-phi(alpha))
115 | return tf.exp(phi_inv(gamma)*sigma+mu)
116 |
117 | def snr_truncated_log_normal(mu, sigma, a, b):
118 | alpha = (a - mu)/sigma
119 | beta = (b - mu)/sigma
120 | z = phi(beta) - phi(alpha)
121 | ratio = erfcx((sigma-beta)/tf.sqrt(2.0))*tf.exp((b-mu)-beta**2/2.0)
122 | ratio = ratio - erfcx((sigma-alpha)/tf.sqrt(2.0))*tf.exp((a-mu)-alpha**2/2.0)
123 | denominator = 2*z*erfcx((2.0*sigma-beta)/tf.sqrt(2.0))*tf.exp(2.0*(b-mu)-beta**2/2.0)
124 | denominator = denominator - 2*z*erfcx((2.0*sigma-alpha)/tf.sqrt(2.0))*tf.exp(2.0*(a-mu)-alpha**2/2.0)
125 | denominator = denominator - ratio**2
126 | ratio = ratio/tf.sqrt(denominator)
127 | return ratio
128 |
129 | def sample_truncated_normal(mu, sigma, a, b):
130 | alpha = (a - mu)/sigma
131 | beta = (b - mu)/sigma
132 | gamma = phi(alpha)+tf.random_uniform(mu.shape)*(phi(beta)-phi(alpha))
133 | return tf.clip_by_value(phi_inv(tf.clip_by_value(gamma, 1e-5, 1.0-1e-5))*sigma+mu, a, b)
134 |
135 | def sbpdropout(x, training,
136 | thres=1.0, scale_kl=None, collect=True,
137 | name='sbpdropout', reuse=None):
138 |
139 | min_log = -20.0
140 | max_log = 0.0
141 |
142 | axis = 1
143 |
144 | params_shape = np.ones(x.get_shape().ndims)
145 | params_shape[axis] = x.get_shape()[axis].value
146 |
147 | with tf.variable_scope(name+'/qpi_vars', reuse=reuse):
148 | with tf.device('/cpu:0'):
149 | mu = tf.get_variable('mu', shape=params_shape.tolist(),
150 | initializer=tf.zeros_initializer())
151 | log_sigma = tf.get_variable('log_sigma', shape=params_shape.tolist(),
152 | initializer=tf.constant_initializer(-5.0))
153 |
154 | mu = tf.clip_by_value(mu, -20.0, 5.0)
155 | log_sigma = tf.clip_by_value(log_sigma, -20.0, 5.0)
156 | sigma = tf.exp(log_sigma)
157 |
158 | # adding loss
159 | alpha = (min_log-mu)/sigma
160 | beta = (max_log-mu)/sigma
161 | z = phi(beta) - phi(alpha)
162 |
163 | def pdf(x):
164 | return tf.exp(-x*x/2.0)/tf.sqrt(2.0*np.pi)
165 | kl = -log_sigma-tf.log(z)-(alpha*pdf(alpha)-beta*pdf(beta))/(2.0*z)
166 | kl = kl+tf.log(max_log-min_log)-tf.log(2.0*np.pi*np.e)/2.0
167 | if scale_kl is None:
168 | kl = tf.reduce_sum(kl)
169 | else:
170 | kl = scale_kl*tf.reduce_mean(kl)
171 |
172 | if training:
173 | z = tf.exp(sample_truncated_normal(mu, sigma, min_log, max_log))
174 | else:
175 | z = mean_truncated_log_normal(mu, sigma, min_log, max_log)
176 | snr = snr_truncated_log_normal(mu, sigma, min_log, max_log)
177 | mask = tf.cast(tf.greater(snr, thres*tf.ones_like(snr)), tf.float32)
178 |
179 | n_active = tf.reduce_sum(tf.cast(mask, tf.int32))
180 |
181 | if collect:
182 | if reuse is not True:
183 | tf.add_to_collection('kl', kl)
184 | prefix = 'train_' if training else 'test_'
185 | tf.add_to_collection(prefix+'p', snr)
186 | tf.add_to_collection(prefix+'n_active', n_active)
187 |
188 | if not training:
189 | z = mask*z
190 |
191 | return x*z
192 |
--------------------------------------------------------------------------------
/model/utils:
--------------------------------------------------------------------------------
1 | ../utils/
--------------------------------------------------------------------------------
/scripts/lenet_conv/bbdropout.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import tensorflow as tf
3 | from tensorflow.examples.tutorials.mnist import input_data
4 | from model.lenet import lenet_conv
5 | from model.bbdropout import bbdropout
6 | from utils.accumulator import Accumulator
7 | from utils.train import *
8 | from utils.mnist import mnist_input
9 | import time
10 | import os
11 | import argparse
12 | import csv
13 | from pylab import *
14 |
15 | parser = argparse.ArgumentParser()
16 | parser.add_argument('--batch_size', type=int, default=100)
17 | parser.add_argument('--n_epochs', type=int, default=200)
18 | parser.add_argument('--save_freq', type=int, default=20)
19 | parser.add_argument('--savedir', type=str, default=None)
20 | parser.add_argument('--pretraindir', type=str, default=None)
21 | parser.add_argument('--mode', type=str, default='train')
22 | parser.add_argument('--gpu_num', type=int, default=0)
23 | parser.add_argument('--csvfn', type=str, default=None)
24 | args = parser.parse_args()
25 |
26 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
27 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num)
28 |
29 | pretraindir = './results/pretrained' if args.pretraindir is None else args.pretraindir
30 | savedir = './results/bbdropout/sample_run' if args.savedir is None else args.savedir
31 | if not os.path.isdir(savedir):
32 | os.makedirs(savedir)
33 |
34 | batch_size = args.batch_size
35 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size)
36 | x = tf.placeholder(tf.float32, [None, 784])
37 | y = tf.placeholder(tf.float32, [None, 10])
38 | N = mnist.train.num_examples
39 | scale_kl = 1e-2*N
40 | dropout = bbdropout
41 | net = lenet_conv(x, y, True, dropout=dropout, scale_kl=scale_kl)
42 | tnet = lenet_conv(x, y, False, reuse=True, dropout=dropout,
43 | scale_kl=scale_kl)
44 |
45 | def train():
46 | loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd']
47 | global_step = tf.train.get_or_create_global_step()
48 | bdr = [int(n_train_batches*(args.n_epochs-1)*r) for r in [0.5, 0.75]]
49 | vals = [1e-2, 1e-3, 1e-4]
50 | lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals)
51 | train_op1 = tf.train.AdamOptimizer(lr).minimize(loss,
52 | var_list=net['qpi_vars'], global_step=global_step)
53 | train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss,
54 | var_list=net['weights'])
55 | train_op = tf.group(train_op1, train_op2)
56 |
57 | pretrain_saver = tf.train.Saver(net['weights'])
58 | saver = tf.train.Saver(net['weights']+net['qpi_vars'])
59 | logfile = open(os.path.join(savedir, 'train.log'), 'w', 0)
60 |
61 | sess = tf.Session()
62 | sess.run(tf.global_variables_initializer())
63 | pretrain_saver.restore(sess, os.path.join(pretraindir, 'model'))
64 |
65 | train_logger = Accumulator('cent', 'acc')
66 | train_to_run = [train_op, net['cent'], net['acc']]
67 | test_logger = Accumulator('cent', 'acc')
68 | test_to_run = [tnet['cent'], tnet['acc']]
69 | for i in range(args.n_epochs):
70 | line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr))
71 | print(line)
72 | logfile.write(line + '\n')
73 | train_logger.clear()
74 | start = time.time()
75 | for j in range(n_train_batches):
76 | bx, by = mnist.train.next_batch(batch_size)
77 | train_logger.accum(sess.run(train_to_run, {x:bx, y:by}))
78 | train_logger.print_(header='train', epoch=i+1,
79 | time=time.time()-start, logfile=logfile)
80 |
81 | test_logger.clear()
82 | for j in range(n_test_batches):
83 | bx, by = mnist.test.next_batch(batch_size)
84 | test_logger.accum(sess.run(test_to_run, {x:bx, y:by}))
85 | test_logger.print_(header='test', epoch=i+1,
86 | time=time.time()-start, logfile=logfile)
87 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
88 | line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n'
89 | print(line)
90 | logfile.write(line+'\n')
91 | if (i+1) % args.save_freq == 0:
92 | saver.save(sess, os.path.join(savedir, 'model'))
93 |
94 | logfile.close()
95 | saver.save(sess, os.path.join(savedir, 'model'))
96 |
97 | def test():
98 | sess = tf.Session()
99 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars'])
100 | saver.restore(sess, os.path.join(savedir, 'model'))
101 | logger = Accumulator('cent', 'acc')
102 | to_run = [tnet['cent'], tnet['acc']]
103 | for j in range(n_test_batches):
104 | bx, by = mnist.test.next_batch(batch_size)
105 | logger.accum(sess.run(to_run, {x:bx, y:by}))
106 | logger.print_(header='test')
107 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
108 | line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n'
109 | print(line)
110 |
111 | def visualize():
112 | sess = tf.Session()
113 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars'])
114 | saver.restore(sess, os.path.join(savedir, 'model'))
115 |
116 | n_drop = len(tnet['n_active'])
117 | fig = figure('pi')
118 | axarr = fig.subplots(n_drop)
119 | for i in range(n_drop):
120 | np_pi = sess.run(tnet['pi'][i]).reshape((1,-1))
121 | im = axarr[i].imshow(np_pi, cmap='gray', aspect='auto')
122 | axarr[i].yaxis.set_visible(False)
123 | axarr[i].xaxis.set_major_locator(MaxNLocator(integer=True))
124 | if i == n_drop-1:
125 | axarr[i].set_xlabel('neurons')
126 | fig.colorbar(im, ax=axarr[i])
127 | show()
128 |
129 | def record():
130 | sess = tf.Session()
131 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars'])
132 | saver.restore(sess, os.path.join(savedir, 'model'))
133 | logger = Accumulator('cent', 'acc')
134 | to_run = [tnet['cent'], tnet['acc']]
135 | for j in range(n_test_batches):
136 | bx, by = mnist.test.next_batch(batch_size)
137 | logger.accum(sess.run(to_run, {x:bx, y:by}))
138 | np_n_active = sess.run(tnet['n_active'])
139 |
140 | if not os.path.isdir('../../records'):
141 | os.makedirs('../../records')
142 | csvfn = os.path.join('../../records',
143 | 'bbdropout_lenet_conv.csv' if args.csvfn is None else args.csvfn)
144 |
145 | if csvfn is not None:
146 | flag = 'a' if os.path.exists(csvfn) else 'w'
147 | with open(csvfn, flag) as f:
148 | writer = csv.writer(f)
149 | if flag=='w':
150 | writer.writerow(['savedir', 'cent', 'acc', 'n_active'])
151 | line = [savedir]
152 | line.append('%.4f' % logger.get('cent'))
153 | line.append('%.4f' % logger.get('acc'))
154 | line.append('-'.join(str(x) for x in np_n_active))
155 | writer.writerow(line)
156 |
157 | if __name__=='__main__':
158 | if args.mode == 'train':
159 | train()
160 | elif args.mode == 'test':
161 | test()
162 | elif args.mode == 'vis':
163 | visualize()
164 | elif args.mode == 'record':
165 | record()
166 | else:
167 | raise ValueError('Invalid mode %s' % args.mode)
168 |
--------------------------------------------------------------------------------
/scripts/lenet_conv/dbbdropout.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import tensorflow as tf
3 | from tensorflow.examples.tutorials.mnist import input_data
4 | from model.lenet import lenet_conv
5 | from model.bbdropout import bbdropout
6 | from utils.accumulator import Accumulator
7 | from utils.train import *
8 | from utils.mnist import mnist_input
9 | import time
10 | import os
11 | import argparse
12 | import csv
13 | import matplotlib
14 | matplotlib.use('Agg')
15 | import matplotlib.pyplot as plt
16 |
17 | parser = argparse.ArgumentParser()
18 | parser.add_argument('--batch_size', type=int, default=100)
19 | parser.add_argument('--n_epochs', type=int, default=200)
20 | parser.add_argument('--save_freq', type=int, default=20)
21 | parser.add_argument('--vis_freq', type=int, default=20)
22 | parser.add_argument('--center_init', type=float, default=1.0)
23 | parser.add_argument('--pretraindir', type=str, default=None)
24 | parser.add_argument('--savedir', type=str, default=None)
25 | parser.add_argument('--mode', type=str, default='train')
26 | parser.add_argument('--gpu_num', type=int, default=0)
27 | parser.add_argument('--csvfn', type=str, default=None)
28 | args = parser.parse_args()
29 |
30 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
31 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num)
32 |
33 | pretraindir = './results/bbdropout/sample_run' if args.pretraindir is None else args.pretraindir
34 | savedir = './results/dbbdropout/sample_run' if args.savedir is None else args.savedir
35 | if not os.path.isdir(savedir):
36 | os.makedirs(savedir)
37 | figdir = os.path.join(savedir, 'figs')
38 | if not os.path.isdir(figdir):
39 | os.makedirs(figdir)
40 |
41 | batch_size = args.batch_size
42 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size)
43 | x = tf.placeholder(tf.float32, [None, 784])
44 | y = tf.placeholder(tf.float32, [None, 10])
45 | N = mnist.train.num_examples
46 | scale_kl = 1e-2*N
47 | center_init = args.center_init
48 | net = lenet_conv(x, y, True, dropout=bbdropout, scale_kl=scale_kl,
49 | dep=True, center_init=center_init)
50 | tnet = lenet_conv(x, y, False, reuse=True,
51 | dropout=bbdropout, scale_kl=scale_kl,
52 | dep=True, center_init=center_init)
53 | n_drop = len(tnet['n_active'])
54 |
55 | def train():
56 | loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd']
57 | global_step = tf.train.get_or_create_global_step()
58 | bdr = [int(n_train_batches*(args.n_epochs-1)*r) for r in [0.5, 0.75]]
59 | vals = [1e-2, 1e-3, 1e-4]
60 | lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals)
61 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
62 | with tf.control_dependencies(update_ops):
63 | train_op1 = tf.train.AdamOptimizer(lr).minimize(loss,
64 | var_list=net['pzx_vars'], global_step=global_step)
65 | train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss,
66 | var_list=net['weights'])
67 | train_op = tf.group(train_op1, train_op2)
68 |
69 | pretrain_saver = tf.train.Saver(net['weights']+net['qpi_vars'])
70 | saver = tf.train.Saver(net['weights']+net['qpi_vars']+net['pzx_vars'])
71 | logfile = open(os.path.join(savedir, 'train.log'), 'w', 0)
72 |
73 | sess = tf.Session()
74 | sess.run(tf.global_variables_initializer())
75 | pretrain_saver.restore(sess, os.path.join(pretraindir, 'model'))
76 |
77 | train_logger = Accumulator('cent', 'acc')
78 | train_to_run = [train_op, net['cent'], net['acc']]
79 | test_logger = Accumulator('cent', 'acc')
80 | test_to_run = [tnet['cent'], tnet['acc']] + tnet['n_active']
81 | for i in range(args.n_epochs):
82 | line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr))
83 | print(line)
84 | logfile.write(line + '\n')
85 | train_logger.clear()
86 | start = time.time()
87 | for j in range(n_train_batches):
88 | bx, by = mnist.train.next_batch(batch_size)
89 | train_logger.accum(sess.run(train_to_run, {x:bx, y:by}))
90 | train_logger.print_(header='train', epoch=i+1,
91 | time=time.time()-start, logfile=logfile)
92 |
93 | test_logger.clear()
94 | np_n_active = [0]*n_drop
95 | for j in range(n_test_batches):
96 | bx, by = mnist.test.next_batch(batch_size)
97 | res = sess.run(test_to_run, {x:bx, y:by})
98 | test_logger.accum(res[:-n_drop])
99 | np_n_active = [a + b for a, b in zip(np_n_active, res[-n_drop:])]
100 | test_logger.print_(header='test', epoch=i+1,
101 | time=time.time()-start, logfile=logfile)
102 | np_n_active = [int(a/n_test_batches) for a in np_n_active]
103 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
104 | line += 'n_active: ' + str(np_n_active) + '\n'
105 | print(line)
106 | logfile.write(line+'\n')
107 |
108 | if (i+1) % args.save_freq == 0:
109 | saver.save(sess, os.path.join(savedir, 'model'))
110 |
111 | if (i+1)%args.vis_freq == 0:
112 | fig = _visualize(sess)
113 | fig.savefig(os.path.join(figdir, 'epoch%d.png'%(i+1)), dpi=200)
114 |
115 | saver.save(sess, os.path.join(savedir, 'model'))
116 | logfile.close()
117 |
118 | def test():
119 | sess = tf.Session()
120 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars'])
121 | saver.restore(sess, os.path.join(savedir, 'model'))
122 | logger = Accumulator('cent', 'acc')
123 | to_run = [tnet['cent'], tnet['acc']] + tnet['n_active']
124 | np_n_active = [0]*n_drop
125 | for j in range(n_test_batches):
126 | bx, by = mnist.test.next_batch(batch_size)
127 | res = sess.run(to_run, {x:bx, y:by})
128 | logger.accum(res[:-n_drop])
129 | np_n_active = [a + b for a, b in zip(np_n_active, res[-n_drop:])]
130 | np_n_active = [int(a/n_test_batches) for a in np_n_active]
131 | logger.print_(header='test')
132 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
133 | line += 'n_active:' + str(np_n_active) + '\n'
134 | print(line)
135 |
136 | def _visualize(sess):
137 | pi_csum = [tf.matmul(y, pi, transpose_a=True) for pi in tnet['pi']]
138 | csum = tf.expand_dims(tf.reduce_sum(y, 0), 1)
139 |
140 | np_pi_csum = [0]*n_drop
141 | np_csum = 0
142 | for j in range(n_test_batches):
143 | bx, by = mnist.test.next_batch(args.batch_size)
144 | A, B = sess.run([pi_csum, csum], {x:bx, y:by})
145 | for k in range(len(pi_csum)):
146 | np_pi_csum[k] += A[k]
147 | np_csum += B
148 |
149 | fig = plt.figure('vis')
150 | axarr = fig.subplots(n_drop)
151 | for i in range(n_drop):
152 | im = axarr[i].imshow(np_pi_csum[i]/np_csum, cmap='gray', aspect='auto')
153 | fig.colorbar(im, ax=axarr[i])
154 | return fig
155 |
156 | def visualize():
157 | sess = tf.Session()
158 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars'])
159 | saver.restore(sess, os.path.join(savedir, 'model'))
160 | _visualize(sess)
161 | plt.show()
162 |
163 | def record():
164 | sess = tf.Session()
165 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars'])
166 | saver.restore(sess, os.path.join(savedir, 'model'))
167 | logger = Accumulator('cent', 'acc')
168 | to_run = [tnet['cent'], tnet['acc']] + tnet['n_active']
169 | np_n_active = [0]*n_drop
170 | for j in range(n_test_batches):
171 | bx, by = mnist.test.next_batch(batch_size)
172 | res = sess.run(to_run, {x:bx, y:by})
173 | logger.accum(res[:-n_drop])
174 | np_n_active = [a + b for a, b in zip(np_n_active, res[-n_drop:])]
175 | np_n_active = [int(a/n_test_batches) for a in np_n_active]
176 |
177 | if not os.path.isdir('../../records'):
178 | os.makedirs('../../records')
179 | csvfn = os.path.join('../../records',
180 | 'dbbdropout_lenet_conv.csv' if args.csvfn is None else args.csvfn)
181 |
182 | if csvfn is not None:
183 | flag = 'a' if os.path.exists(csvfn) else 'w'
184 | with open(csvfn, flag) as f:
185 | writer = csv.writer(f)
186 | if flag=='w':
187 | writer.writerow(['savedir', 'cent', 'acc', 'n_active'])
188 | line = [savedir]
189 | line.append('%.4f' % logger.get('cent'))
190 | line.append('%.4f' % logger.get('acc'))
191 | line.append('-'.join(str(x) for x in np_n_active))
192 | writer.writerow(line)
193 |
194 | if __name__=='__main__':
195 | if args.mode == 'train':
196 | train()
197 | elif args.mode == 'test':
198 | test()
199 | elif args.mode == 'vis':
200 | visualize()
201 | elif args.mode == 'record':
202 | record()
203 | else:
204 | raise ValueError('Invalid mode %s' % args.mode)
205 |
--------------------------------------------------------------------------------
/scripts/lenet_conv/model:
--------------------------------------------------------------------------------
1 | ../../model/
--------------------------------------------------------------------------------
/scripts/lenet_conv/pretrain.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import tensorflow as tf
3 | from tensorflow.examples.tutorials.mnist import input_data
4 | from model.lenet import lenet_conv
5 | from utils.accumulator import Accumulator
6 | from utils.train import *
7 | from utils.mnist import mnist_input
8 | import time
9 | import os
10 | import argparse
11 |
12 | parser = argparse.ArgumentParser()
13 | parser.add_argument('--batch_size', type=int, default=100)
14 | parser.add_argument('--n_epochs', type=int, default=200)
15 | parser.add_argument('--save_freq', type=int, default=20)
16 | parser.add_argument('--savedir', type=str, default=None)
17 | parser.add_argument('--mode', type=str, default='train')
18 | parser.add_argument('--gpu_num', type=int, default=0)
19 | args = parser.parse_args()
20 |
21 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
22 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num)
23 |
24 | savedir = './results/pretrained' if args.savedir is None else args.savedir
25 | if not os.path.isdir(savedir):
26 | os.makedirs(savedir)
27 |
28 | batch_size = args.batch_size
29 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size)
30 | x = tf.placeholder(tf.float32, [None, 784])
31 | y = tf.placeholder(tf.float32, [None, 10])
32 | net = lenet_conv(x, y, True)
33 | tnet = lenet_conv(x, y, False, reuse=True)
34 |
35 | def train():
36 | loss = net['cent'] + net['wd']
37 | global_step = tf.train.get_or_create_global_step()
38 | lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32),
39 | [n_train_batches*args.n_epochs/2], [1e-4, 1e-5])
40 | train_op = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step)
41 |
42 | saver = tf.train.Saver(net['weights'])
43 | logfile = open(os.path.join(savedir, 'train.log'), 'w', 0)
44 |
45 | sess = tf.Session()
46 | sess.run(tf.global_variables_initializer())
47 |
48 | train_logger = Accumulator('cent', 'acc')
49 | train_to_run = [train_op, net['cent'], net['acc']]
50 | test_logger = Accumulator('cent', 'acc')
51 | test_to_run = [tnet['cent'], tnet['acc']]
52 | for i in range(args.n_epochs):
53 | line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr))
54 | print(line)
55 | logfile.write(line + '\n')
56 | train_logger.clear()
57 | start = time.time()
58 | for j in range(n_train_batches):
59 | bx, by = mnist.train.next_batch(batch_size)
60 | train_logger.accum(sess.run(train_to_run, {x:bx, y:by}))
61 | train_logger.print_(header='train', epoch=i+1,
62 | time=time.time()-start, logfile=logfile)
63 |
64 | test_logger.clear()
65 | for j in range(n_test_batches):
66 | bx, by = mnist.test.next_batch(batch_size)
67 | test_logger.accum(sess.run(test_to_run, {x:bx, y:by}))
68 | test_logger.print_(header='test', epoch=i+1,
69 | time=time.time()-start, logfile=logfile)
70 | print()
71 | logfile.write('\n')
72 | if (i+1)%args.save_freq == 0:
73 | saver.save(sess, os.path.join(savedir, 'model'))
74 |
75 | logfile.close()
76 | saver.save(sess, os.path.join(savedir, 'model'))
77 |
78 | def test():
79 | sess = tf.Session()
80 | saver = tf.train.Saver(tnet['weights'])
81 | saver.restore(sess, os.path.join(savedir, 'model'))
82 | logger = Accumulator('cent', 'acc')
83 | to_run = [tnet['cent'], tnet['acc']]
84 | for j in range(n_test_batches):
85 | bx, by = mnist.test.next_batch(batch_size)
86 | logger.accum(sess.run(to_run, {x:bx, y:by}))
87 | logger.print_(header='test')
88 |
89 | if __name__=='__main__':
90 | if args.mode == 'train':
91 | train()
92 | elif args.mode == 'test':
93 | test()
94 | else:
95 | raise ValueError('Invalid mode %s' % args.mode)
96 |
--------------------------------------------------------------------------------
/scripts/lenet_conv/sbpdropout.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import tensorflow as tf
3 | from tensorflow.examples.tutorials.mnist import input_data
4 | from model.lenet import lenet_conv
5 | from model.sbpdropout import sbpdropout
6 | from utils.accumulator import Accumulator
7 | from utils.train import *
8 | from utils.mnist import mnist_input
9 | import time
10 | import os
11 | import argparse
12 | import csv
13 |
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument('--batch_size', type=int, default=100)
16 | parser.add_argument('--n_epochs', type=int, default=200)
17 | parser.add_argument('--save_freq', type=int, default=20)
18 | parser.add_argument('--savedir', type=str, default=None)
19 | parser.add_argument('--pretraindir', type=str, default=None)
20 | parser.add_argument('--mode', type=str, default='train')
21 | parser.add_argument('--gpu_num', type=int, default=0)
22 | parser.add_argument('--csvfn', type=str, default=None)
23 | args = parser.parse_args()
24 |
25 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
26 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num)
27 |
28 | pretraindir = './results/pretrained' if args.pretraindir is None else args.pretraindir
29 | savedir = './results/bbdropout/sample_run' if args.savedir is None else args.savedir
30 | if not os.path.isdir(savedir):
31 | os.makedirs(savedir)
32 |
33 | batch_size = args.batch_size
34 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size)
35 | x = tf.placeholder(tf.float32, [None, 784])
36 | y = tf.placeholder(tf.float32, [None, 10])
37 | N = mnist.train.num_examples
38 | scale_kl = 1e-2*N
39 | dropout = sbpdropout
40 | net = lenet_conv(x, y, True, dropout=dropout, scale_kl=scale_kl)
41 | tnet = lenet_conv(x, y, False, reuse=True, dropout=dropout,
42 | scale_kl=scale_kl)
43 |
44 | def train():
45 | loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd']
46 | global_step = tf.train.get_or_create_global_step()
47 | bdr = [int(n_train_batches*(args.n_epochs-1)*r) for r in [0.5, 0.75]]
48 | vals = [1e-2, 1e-3, 1e-4]
49 | lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals)
50 | train_op1 = tf.train.AdamOptimizer(lr).minimize(loss,
51 | var_list=net['qpi_vars'], global_step=global_step)
52 | train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss,
53 | var_list=net['weights'])
54 | train_op = tf.group(train_op1, train_op2)
55 |
56 | pretrain_saver = tf.train.Saver(net['weights'])
57 | saver = tf.train.Saver(net['weights']+net['qpi_vars'])
58 | logfile = open(os.path.join(savedir, 'train.log'), 'w', 0)
59 |
60 | sess = tf.Session()
61 | sess.run(tf.global_variables_initializer())
62 | pretrain_saver.restore(sess, os.path.join(pretraindir, 'model'))
63 |
64 | train_logger = Accumulator('cent', 'acc')
65 | train_to_run = [train_op, net['cent'], net['acc']]
66 | test_logger = Accumulator('cent', 'acc')
67 | test_to_run = [tnet['cent'], tnet['acc']]
68 | for i in range(args.n_epochs):
69 | line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr))
70 | print(line)
71 | logfile.write(line + '\n')
72 | train_logger.clear()
73 | start = time.time()
74 | for j in range(n_train_batches):
75 | bx, by = mnist.train.next_batch(batch_size)
76 | train_logger.accum(sess.run(train_to_run, {x:bx, y:by}))
77 | train_logger.print_(header='train', epoch=i+1,
78 | time=time.time()-start, logfile=logfile)
79 |
80 | test_logger.clear()
81 | for j in range(n_test_batches):
82 | bx, by = mnist.test.next_batch(batch_size)
83 | test_logger.accum(sess.run(test_to_run, {x:bx, y:by}))
84 | test_logger.print_(header='test', epoch=i+1,
85 | time=time.time()-start, logfile=logfile)
86 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
87 | line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n'
88 | print(line)
89 | logfile.write(line+'\n')
90 | if (i+1)%args.save_freq == 0:
91 | saver.save(sess, os.path.join(savedir, 'model'))
92 |
93 | logfile.close()
94 | saver.save(sess, os.path.join(savedir, 'model'))
95 |
96 | def test():
97 | sess = tf.Session()
98 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars'])
99 | saver.restore(sess, os.path.join(savedir, 'model'))
100 | logger = Accumulator('cent', 'acc')
101 | to_run = [tnet['cent'], tnet['acc']]
102 | for j in range(n_test_batches):
103 | bx, by = mnist.test.next_batch(batch_size)
104 | logger.accum(sess.run(to_run, {x:bx, y:by}))
105 | logger.print_(header='test')
106 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
107 | line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n'
108 | print(line)
109 |
110 | def record():
111 | sess = tf.Session()
112 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars'])
113 | saver.restore(sess, os.path.join(savedir, 'model'))
114 | logger = Accumulator('cent', 'acc')
115 | to_run = [tnet['cent'], tnet['acc']]
116 | for j in range(n_test_batches):
117 | bx, by = mnist.test.next_batch(batch_size)
118 | logger.accum(sess.run(to_run, {x:bx, y:by}))
119 | np_n_active = sess.run(tnet['n_active'])
120 |
121 | if not os.path.isdir('../../records'):
122 | os.makedirs('../../records')
123 | csvfn = os.path.join('../../records',
124 | 'sbpdropout_lenet_conv.csv' if args.csvfn is None else args.csvfn)
125 |
126 | if csvfn is not None:
127 | flag = 'a' if os.path.exists(csvfn) else 'w'
128 | with open(csvfn, flag) as f:
129 | writer = csv.writer(f)
130 | if flag=='w':
131 | writer.writerow(['savedir', 'cent', 'acc', 'n_active'])
132 | line = [savedir]
133 | line.append('%.4f' % logger.get('cent'))
134 | line.append('%.4f' % logger.get('acc'))
135 | line.append('-'.join(str(x) for x in np_n_active))
136 | writer.writerow(line)
137 |
138 | if __name__=='__main__':
139 | if args.mode == 'train':
140 | train()
141 | elif args.mode == 'test':
142 | test()
143 | elif args.mode == 'record':
144 | record()
145 | else:
146 | raise ValueError('Invalid mode %s' % args.mode)
147 |
--------------------------------------------------------------------------------
/scripts/lenet_conv/utils:
--------------------------------------------------------------------------------
1 | ../../utils/
--------------------------------------------------------------------------------
/scripts/lenet_dense/bbdropout.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | from tensorflow.examples.tutorials.mnist import input_data
3 | from model.lenet import lenet_dense
4 | from model.bbdropout import bbdropout
5 | from utils.accumulator import Accumulator
6 | from utils.train import *
7 | from utils.mnist import mnist_input
8 | import time
9 | import os
10 | import argparse
11 | import csv
12 | from pylab import *
13 |
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument('--batch_size', type=int, default=100)
16 | parser.add_argument('--n_epochs', type=int, default=200)
17 | parser.add_argument('--save_freq', type=int, default=20)
18 | parser.add_argument('--savedir', type=str, default=None)
19 | parser.add_argument('--pretraindir', type=str, default=None)
20 | parser.add_argument('--mode', type=str, default='train')
21 | parser.add_argument('--gpu_num', type=int, default=0)
22 | parser.add_argument('--csvfn', type=str, default=None)
23 | args = parser.parse_args()
24 |
25 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
26 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num)
27 |
28 | pretraindir = './results/pretrained' if args.pretraindir is None else args.pretraindir
29 | savedir = './results/bbdropout/sample_run' if args.savedir is None else args.savedir
30 | if not os.path.isdir(savedir):
31 | os.makedirs(savedir)
32 |
33 | batch_size = args.batch_size
34 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size)
35 | x = tf.placeholder(tf.float32, [None, 784])
36 | y = tf.placeholder(tf.float32, [None, 10])
37 | N = mnist.train.num_examples
38 | dropout = bbdropout
39 | net = lenet_dense(x, y, True, dropout=dropout)
40 | tnet = lenet_dense(x, y, False, reuse=True, dropout=dropout)
41 |
42 | def train():
43 | loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd']
44 | global_step = tf.train.get_or_create_global_step()
45 | bdr = [int(n_train_batches*(args.n_epochs-1)*r) for r in [0.5, 0.75]]
46 | vals = [1e-2, 1e-3, 1e-4]
47 | lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals)
48 | train_op1 = tf.train.AdamOptimizer(lr).minimize(loss,
49 | var_list=net['qpi_vars'], global_step=global_step)
50 | train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss,
51 | var_list=net['weights'])
52 | train_op = tf.group(train_op1, train_op2)
53 |
54 | pretrain_saver = tf.train.Saver(net['weights'])
55 | saver = tf.train.Saver(net['weights']+net['qpi_vars'])
56 | logfile = open(os.path.join(savedir, 'train.log'), 'w', 0)
57 |
58 | sess = tf.Session()
59 | sess.run(tf.global_variables_initializer())
60 | pretrain_saver.restore(sess, os.path.join(pretraindir, 'model'))
61 |
62 | train_logger = Accumulator('cent', 'acc')
63 | train_to_run = [train_op, net['cent'], net['acc']]
64 | test_logger = Accumulator('cent', 'acc')
65 | test_to_run = [tnet['cent'], tnet['acc']]
66 | for i in range(args.n_epochs):
67 | line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr))
68 | print(line)
69 | logfile.write(line + '\n')
70 | train_logger.clear()
71 | start = time.time()
72 | for j in range(n_train_batches):
73 | bx, by = mnist.train.next_batch(batch_size)
74 | train_logger.accum(sess.run(train_to_run, {x:bx, y:by}))
75 | train_logger.print_(header='train', epoch=i+1,
76 | time=time.time()-start, logfile=logfile)
77 |
78 | test_logger.clear()
79 | for j in range(n_test_batches):
80 | bx, by = mnist.test.next_batch(batch_size)
81 | test_logger.accum(sess.run(test_to_run, {x:bx, y:by}))
82 | test_logger.print_(header='test', epoch=i+1,
83 | time=time.time()-start, logfile=logfile)
84 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
85 | line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n'
86 | print(line)
87 | logfile.write(line+'\n')
88 |
89 | if (i+1)%args.save_freq == 0:
90 | saver.save(sess, os.path.join(savedir, 'model'))
91 |
92 | logfile.close()
93 | saver.save(sess, os.path.join(savedir, 'model'))
94 |
95 | def test():
96 | sess = tf.Session()
97 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars'])
98 | saver.restore(sess, os.path.join(savedir, 'model'))
99 | logger = Accumulator('cent', 'acc')
100 | to_run = [tnet['cent'], tnet['acc']]
101 | for j in range(n_test_batches):
102 | bx, by = mnist.test.next_batch(batch_size)
103 | logger.accum(sess.run(to_run, {x:bx, y:by}))
104 | logger.print_(header='test')
105 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
106 | line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n'
107 | print(line)
108 |
109 | def visualize():
110 | sess = tf.Session()
111 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars'])
112 | saver.restore(sess, os.path.join(savedir, 'model'))
113 |
114 | n_drop = len(tnet['n_active'])
115 | fig = figure('pi')
116 | axarr = fig.subplots(n_drop)
117 | for i in range(n_drop):
118 | np_pi = sess.run(tnet['pi'][i]).reshape((1,-1))
119 | im = axarr[i].imshow(np_pi, cmap='gray', aspect='auto')
120 | axarr[i].yaxis.set_visible(False)
121 | axarr[i].xaxis.set_major_locator(MaxNLocator(integer=True))
122 | if i == n_drop-1:
123 | axarr[i].set_xlabel('neurons')
124 | fig.colorbar(im, ax=axarr[i])
125 | show()
126 |
127 | def record():
128 | sess = tf.Session()
129 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars'])
130 | saver.restore(sess, os.path.join(savedir, 'model'))
131 | logger = Accumulator('cent', 'acc')
132 | to_run = [tnet['cent'], tnet['acc']]
133 | for j in range(n_test_batches):
134 | bx, by = mnist.test.next_batch(batch_size)
135 | logger.accum(sess.run(to_run, {x:bx, y:by}))
136 | np_n_active = sess.run(tnet['n_active'])
137 |
138 | if not os.path.isdir('../../records'):
139 | os.makedirs('../../records')
140 | csvfn = os.path.join('../../records',
141 | 'bbdropout_lenet_dense.csv' if args.csvfn is None else args.csvfn)
142 |
143 | if csvfn is not None:
144 | flag = 'a' if os.path.exists(csvfn) else 'w'
145 | with open(csvfn, flag) as f:
146 | writer = csv.writer(f)
147 | if flag=='w':
148 | writer.writerow(['savedir', 'cent', 'acc', 'n_active'])
149 | line = [savedir]
150 | line.append('%.4f' % logger.get('cent'))
151 | line.append('%.4f' % logger.get('acc'))
152 | line.append('-'.join(str(x) for x in np_n_active))
153 | writer.writerow(line)
154 |
155 | if __name__=='__main__':
156 | if args.mode == 'train':
157 | train()
158 | elif args.mode == 'test':
159 | test()
160 | elif args.mode == 'vis':
161 | visualize()
162 | elif args.mode == 'record':
163 | record()
164 | else:
165 | raise ValueError('Invalid mode %s' % args.mode)
166 |
--------------------------------------------------------------------------------
/scripts/lenet_dense/dbbdropout.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import tensorflow as tf
3 | from tensorflow.examples.tutorials.mnist import input_data
4 | from model.lenet import lenet_dense
5 | from model.bbdropout import bbdropout
6 | from utils.accumulator import Accumulator
7 | from utils.train import *
8 | from utils.mnist import mnist_input
9 | import time
10 | import os
11 | import argparse
12 | import csv
13 | import matplotlib
14 | matplotlib.use('Agg')
15 | import matplotlib.pyplot as plt
16 |
17 | parser = argparse.ArgumentParser()
18 | parser.add_argument('--batch_size', type=int, default=100)
19 | parser.add_argument('--n_epochs', type=int, default=200)
20 | parser.add_argument('--save_freq', type=int, default=20)
21 | parser.add_argument('--vis_freq', type=int, default=20)
22 | parser.add_argument('--center_init', type=float, default=1.0)
23 | parser.add_argument('--savedir', type=str, default=None)
24 | parser.add_argument('--pretraindir', type=str, default=None)
25 | parser.add_argument('--mode', type=str, default='train')
26 | parser.add_argument('--gpu_num', type=int, default=0)
27 | parser.add_argument('--csvfn', type=str, default=None)
28 | args = parser.parse_args()
29 |
30 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
31 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num)
32 |
33 | pretraindir = './results/bbdropout/sample_run' if args.pretraindir is None else args.pretraindir
34 | savedir = './results/dbbdropout/sample_run' if args.savedir is None else args.savedir
35 | if not os.path.isdir(savedir):
36 | os.makedirs(savedir)
37 | figdir = os.path.join(savedir, 'figs')
38 | if not os.path.isdir(figdir):
39 | os.makedirs(figdir)
40 |
41 | batch_size = args.batch_size
42 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size)
43 | x = tf.placeholder(tf.float32, [None, 784])
44 | y = tf.placeholder(tf.float32, [None, 10])
45 | N = mnist.train.num_examples
46 | center_init = args.center_init
47 | net = lenet_dense(x, y, True, dropout=bbdropout,
48 | dep=True, center_init=center_init)
49 | tnet = lenet_dense(x, y, False, reuse=True, dropout=bbdropout,
50 | dep=True, center_init=center_init)
51 | n_drop = len(tnet['n_active'])
52 |
53 | def train():
54 | loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd']
55 | global_step = tf.train.get_or_create_global_step()
56 | bdr = [int(n_train_batches*(args.n_epochs-1)*r) for r in [0.5, 0.75]]
57 | vals = [1e-2, 1e-3, 1e-4]
58 | lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals)
59 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
60 | with tf.control_dependencies(update_ops):
61 | train_op1 = tf.train.AdamOptimizer(lr).minimize(loss,
62 | var_list=net['pzx_vars'], global_step=global_step)
63 | train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss,
64 | var_list=net['weights'])
65 | train_op = tf.group(train_op1, train_op2)
66 |
67 | pretrain_saver = tf.train.Saver(net['weights']+net['qpi_vars'])
68 | saver = tf.train.Saver(net['weights']+net['qpi_vars']+net['pzx_vars'])
69 | logfile = open(os.path.join(savedir, 'train.log'), 'w', 0)
70 |
71 | sess = tf.Session()
72 | sess.run(tf.global_variables_initializer())
73 | pretrain_saver.restore(sess, os.path.join(pretraindir, 'model'))
74 |
75 | train_logger = Accumulator('cent', 'acc')
76 | train_to_run = [train_op, net['cent'], net['acc']]
77 | test_logger = Accumulator('cent', 'acc')
78 | test_to_run = [tnet['cent'], tnet['acc']]
79 | test_to_run += tnet['n_active']
80 | for i in range(args.n_epochs):
81 | line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr))
82 | print(line)
83 | logfile.write(line + '\n')
84 | train_logger.clear()
85 | start = time.time()
86 | for j in range(n_train_batches):
87 | bx, by = mnist.train.next_batch(batch_size)
88 | train_logger.accum(sess.run(train_to_run, {x:bx, y:by}))
89 | train_logger.print_(header='train', epoch=i+1,
90 | time=time.time()-start, logfile=logfile)
91 |
92 | test_logger.clear()
93 | np_n_active = [0]*n_drop
94 | for j in range(n_test_batches):
95 | bx, by = mnist.test.next_batch(batch_size)
96 | res = sess.run(test_to_run, {x:bx, y:by})
97 | test_logger.accum(res[:-n_drop])
98 | np_n_active = [a + b for a, b in zip(np_n_active, res[-n_drop:])]
99 | test_logger.print_(header='test', epoch=i+1,
100 | time=time.time()-start, logfile=logfile)
101 | np_n_active = [int(a/n_test_batches) for a in np_n_active]
102 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
103 | line += 'n_active: ' + str(np_n_active) + '\n'
104 | print(line)
105 | logfile.write(line+'\n')
106 | if (i+1)%args.save_freq == 0:
107 | saver.save(sess, os.path.join(savedir, 'model'))
108 |
109 | if (i+1)%args.vis_freq == 0:
110 | fig = _visualize(sess)
111 | fig.savefig(os.path.join(figdir, 'epoch%d.png'%(i+1)), dpi=200)
112 |
113 | logfile.close()
114 | saver.save(sess, os.path.join(savedir, 'model'))
115 |
116 | def test():
117 | sess = tf.Session()
118 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars'])
119 | saver.restore(sess, os.path.join(savedir, 'model'))
120 | logger = Accumulator('cent', 'acc')
121 | to_run = [tnet['cent'], tnet['acc']] + tnet['n_active']
122 | np_n_active = [0]*n_drop
123 | for j in range(n_test_batches):
124 | bx, by = mnist.test.next_batch(batch_size)
125 | res = sess.run(to_run, {x:bx, y:by})
126 | logger.accum(res[:-n_drop])
127 | np_n_active = [a + b for a, b in zip(np_n_active, res[-n_drop:])]
128 | np_n_active = [int(a/n_test_batches) for a in np_n_active]
129 | logger.print_(header='test')
130 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
131 | line += 'n_active:' + str(np_n_active) + '\n'
132 | print(line)
133 |
134 | def _visualize(sess):
135 | pi_csum = [tf.matmul(y, pi, transpose_a=True) for pi in tnet['pi']]
136 | csum = tf.expand_dims(tf.reduce_sum(y, 0), 1)
137 |
138 | np_pi_csum = [0]*n_drop
139 | np_csum = 0
140 | for j in range(n_test_batches):
141 | bx, by = mnist.test.next_batch(args.batch_size)
142 | A, B = sess.run([pi_csum, csum], {x:bx, y:by})
143 | for k in range(len(pi_csum)):
144 | np_pi_csum[k] += A[k]
145 | np_csum += B
146 |
147 | fig = plt.figure('vis')
148 | axarr = fig.subplots(n_drop)
149 | for i in range(n_drop):
150 | im = axarr[i].imshow(np_pi_csum[i]/np_csum, cmap='gray', aspect='auto')
151 | fig.colorbar(im, ax=axarr[i])
152 | return fig
153 |
154 | def visualize():
155 | sess = tf.Session()
156 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars'])
157 | saver.restore(sess, os.path.join(savedir, 'model'))
158 | _visualize(sess)
159 | plt.show()
160 |
161 | def record():
162 | sess = tf.Session()
163 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars']+tnet['pzx_vars'])
164 | saver.restore(sess, os.path.join(savedir, 'model'))
165 | logger = Accumulator('cent', 'acc')
166 | to_run = [tnet['cent'], tnet['acc']] + tnet['n_active']
167 | np_n_active = [0]*n_drop
168 | for j in range(n_test_batches):
169 | bx, by = mnist.test.next_batch(batch_size)
170 | res = sess.run(to_run, {x:bx, y:by})
171 | logger.accum(res[:-n_drop])
172 | np_n_active = [a + b for a, b in zip(np_n_active, res[-n_drop:])]
173 | np_n_active = [int(a/n_test_batches) for a in np_n_active]
174 |
175 | if not os.path.isdir('../../records'):
176 | os.makedirs('../../records')
177 | csvfn = os.path.join('../../records',
178 | 'dbbdropout_lenet_dense.csv' if args.csvfn is None else args.csvfn)
179 |
180 | if csvfn is not None:
181 | flag = 'a' if os.path.exists(csvfn) else 'w'
182 | with open(csvfn, flag) as f:
183 | writer = csv.writer(f)
184 | if flag=='w':
185 | writer.writerow(['savedir', 'cent', 'acc', 'n_active'])
186 | line = [savedir]
187 | line.append('%.4f' % logger.get('cent'))
188 | line.append('%.4f' % logger.get('acc'))
189 | line.append('-'.join(str(x) for x in np_n_active))
190 | writer.writerow(line)
191 |
192 | if __name__=='__main__':
193 | if args.mode == 'train':
194 | train()
195 | elif args.mode == 'test':
196 | test()
197 | elif args.mode == 'vis':
198 | visualize()
199 | elif args.mode == 'record':
200 | record()
201 | else:
202 | raise ValueError('Invalid mode %s' % args.mode)
203 |
--------------------------------------------------------------------------------
/scripts/lenet_dense/model:
--------------------------------------------------------------------------------
1 | ../../model
--------------------------------------------------------------------------------
/scripts/lenet_dense/pretrain.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import tensorflow as tf
3 | from tensorflow.examples.tutorials.mnist import input_data
4 | from model.lenet import lenet_dense
5 | from utils.accumulator import Accumulator
6 | from utils.train import *
7 | from utils.mnist import mnist_input
8 | import time
9 | import os
10 | import argparse
11 |
12 | parser = argparse.ArgumentParser()
13 | parser.add_argument('--batch_size', type=int, default=100)
14 | parser.add_argument('--n_epochs', type=int, default=200)
15 | parser.add_argument('--save_freq', type=int, default=20)
16 | parser.add_argument('--savedir', type=str, default=None)
17 | parser.add_argument('--mode', type=str, default='train')
18 | parser.add_argument('--gpu_num', type=int, default=0)
19 | args = parser.parse_args()
20 |
21 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
22 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num)
23 |
24 | savedir = './results/pretrained' if args.savedir is None else args.savedir
25 | if not os.path.isdir(savedir):
26 | os.makedirs(savedir)
27 |
28 | batch_size = args.batch_size
29 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size)
30 | x = tf.placeholder(tf.float32, [None, 784])
31 | y = tf.placeholder(tf.float32, [None, 10])
32 | net = lenet_dense(x, y, True)
33 | tnet = lenet_dense(x, y, False, reuse=True)
34 |
35 | def train():
36 | loss = net['cent'] + net['wd']
37 | global_step = tf.train.get_or_create_global_step()
38 | lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32),
39 | [n_train_batches*args.n_epochs/2], [1e-4, 1e-5])
40 | train_op = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step)
41 |
42 | saver = tf.train.Saver(net['weights'])
43 | logfile = open(os.path.join(savedir, 'train.log'), 'w', 0)
44 |
45 | sess = tf.Session()
46 | sess.run(tf.global_variables_initializer())
47 |
48 | train_logger = Accumulator('cent', 'acc')
49 | train_to_run = [train_op, net['cent'], net['acc']]
50 | test_logger = Accumulator('cent', 'acc')
51 | test_to_run = [tnet['cent'], tnet['acc']]
52 | for i in range(args.n_epochs):
53 | line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr))
54 | print (line)
55 | logfile.write(line + '\n')
56 | train_logger.clear()
57 | start = time.time()
58 | for j in range(n_train_batches):
59 | bx, by = mnist.train.next_batch(batch_size)
60 | train_logger.accum(sess.run(train_to_run, {x:bx, y:by}))
61 | train_logger.print_(header='train', epoch=i+1,
62 | time=time.time()-start, logfile=logfile)
63 |
64 | test_logger.clear()
65 | for j in range(n_test_batches):
66 | bx, by = mnist.test.next_batch(batch_size)
67 | test_logger.accum(sess.run(test_to_run, {x:bx, y:by}))
68 | test_logger.print_(header='test', epoch=i+1,
69 | time=time.time()-start, logfile=logfile)
70 |
71 | print()
72 | logfile.write('\n')
73 | if (i+1)%args.save_freq == 0:
74 | saver.save(sess, os.path.join(savedir, 'model'))
75 |
76 | logfile.close()
77 | saver.save(sess, os.path.join(savedir, 'model'))
78 |
79 | def test():
80 | sess = tf.Session()
81 | saver = tf.train.Saver(tnet['weights'])
82 | saver.restore(sess, os.path.join(savedir, 'model'))
83 | logger = Accumulator('cent', 'acc')
84 | to_run = [tnet['cent'], tnet['acc']]
85 | for j in range(n_test_batches):
86 | bx, by = mnist.test.next_batch(batch_size)
87 | logger.accum(sess.run(to_run, {x:bx, y:by}))
88 | logger.print_(header='test')
89 |
90 | if __name__=='__main__':
91 | if args.mode == 'train':
92 | train()
93 | elif args.mode == 'test':
94 | test()
95 | else:
96 | raise ValueError('Invalid mode %s' % args.mode)
97 |
--------------------------------------------------------------------------------
/scripts/lenet_dense/sbpdropout.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import tensorflow as tf
3 | from tensorflow.examples.tutorials.mnist import input_data
4 | from model.lenet import lenet_dense
5 | from model.sbpdropout import sbpdropout
6 | from utils.accumulator import Accumulator
7 | from utils.train import *
8 | from utils.mnist import mnist_input
9 | import time
10 | import os
11 | import argparse
12 | import csv
13 |
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument('--batch_size', type=int, default=100)
16 | parser.add_argument('--n_epochs', type=int, default=200)
17 | parser.add_argument('--save_freq', type=int, default=20)
18 | parser.add_argument('--savedir', type=str, default=None)
19 | parser.add_argument('--pretraindir', type=str, default=None)
20 | parser.add_argument('--mode', type=str, default='train')
21 | parser.add_argument('--gpu_num', type=int, default=0)
22 | parser.add_argument('--csvfn', type=str, default=None)
23 | args = parser.parse_args()
24 |
25 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
26 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_num)
27 |
28 | pretraindir = './results/pretrained' if args.pretraindir is None else args.pretraindir
29 | savedir = './results/sbpdropout/sample_run' if args.savedir is None else args.savedir
30 | if not os.path.isdir(savedir):
31 | os.makedirs(savedir)
32 |
33 | batch_size = args.batch_size
34 | mnist, n_train_batches, n_test_batches = mnist_input(batch_size)
35 | x = tf.placeholder(tf.float32, [None, 784])
36 | y = tf.placeholder(tf.float32, [None, 10])
37 | N = mnist.train.num_examples
38 | dropout = sbpdropout
39 | net = lenet_dense(x, y, True, dropout=dropout)
40 | tnet = lenet_dense(x, y, False, reuse=True, dropout=dropout)
41 |
42 | def train():
43 | loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd']
44 | global_step = tf.train.get_or_create_global_step()
45 | bdr = [int(n_train_batches*(args.n_epochs-1)*r) for r in [0.5, 0.75]]
46 | vals = [1e-2, 1e-3, 1e-4]
47 | lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals)
48 | train_op1 = tf.train.AdamOptimizer(lr).minimize(loss,
49 | var_list=net['qpi_vars'], global_step=global_step)
50 | train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss,
51 | var_list=net['weights'])
52 | train_op = tf.group(train_op1, train_op2)
53 |
54 | pretrain_saver = tf.train.Saver(net['weights'])
55 | saver = tf.train.Saver(net['weights']+net['qpi_vars'])
56 | logfile = open(os.path.join(savedir, 'train.log'), 'w', 0)
57 |
58 | sess = tf.Session()
59 | sess.run(tf.global_variables_initializer())
60 | pretrain_saver.restore(sess, os.path.join(pretraindir, 'model'))
61 |
62 | train_logger = Accumulator('cent', 'acc')
63 | train_to_run = [train_op, net['cent'], net['acc']]
64 | test_logger = Accumulator('cent', 'acc')
65 | test_to_run = [tnet['cent'], tnet['acc']]
66 | for i in range(args.n_epochs):
67 | line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr))
68 | print(line)
69 | logfile.write(line + '\n')
70 | train_logger.clear()
71 | start = time.time()
72 | for j in range(n_train_batches):
73 | bx, by = mnist.train.next_batch(batch_size)
74 | train_logger.accum(sess.run(train_to_run, {x:bx, y:by}))
75 | train_logger.print_(header='train', epoch=i+1,
76 | time=time.time()-start, logfile=logfile)
77 |
78 | test_logger.clear()
79 | for j in range(n_test_batches):
80 | bx, by = mnist.test.next_batch(batch_size)
81 | test_logger.accum(sess.run(test_to_run, {x:bx, y:by}))
82 | test_logger.print_(header='test', epoch=i+1,
83 | time=time.time()-start, logfile=logfile)
84 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
85 | line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n'
86 | print(line)
87 | logfile.write(line+'\n')
88 | if (i+1)%args.save_freq == 0:
89 | saver.save(sess, os.path.join(savedir, 'model'))
90 |
91 | logfile.close()
92 | saver.save(sess, os.path.join(savedir, 'model'))
93 |
94 | def test():
95 | sess = tf.Session()
96 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars'])
97 | saver.restore(sess, os.path.join(savedir, 'model'))
98 | logger = Accumulator('cent', 'acc')
99 | to_run = [tnet['cent'], tnet['acc']]
100 | for j in range(n_test_batches):
101 | bx, by = mnist.test.next_batch(batch_size)
102 | logger.accum(sess.run(to_run, {x:bx, y:by}))
103 | logger.print_(header='test')
104 | line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
105 | line += 'n_active: ' + str(sess.run(tnet['n_active'])) + '\n'
106 | print(line)
107 |
108 | def record():
109 | sess = tf.Session()
110 | saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars'])
111 | saver.restore(sess, os.path.join(savedir, 'model'))
112 | logger = Accumulator('cent', 'acc')
113 | to_run = [tnet['cent'], tnet['acc']]
114 | for j in range(n_test_batches):
115 | bx, by = mnist.test.next_batch(batch_size)
116 | logger.accum(sess.run(to_run, {x:bx, y:by}))
117 | np_n_active = sess.run(tnet['n_active'])
118 |
119 | if not os.path.isdir('../../records'):
120 | os.makedirs('../../records')
121 | csvfn = os.path.join('../../records',
122 | 'sbpdropout_lenet_dense.csv' if args.csvfn is None else args.csvfn)
123 |
124 | if csvfn is not None:
125 | flag = 'a' if os.path.exists(csvfn) else 'w'
126 | with open(csvfn, flag) as f:
127 | writer = csv.writer(f)
128 | if flag=='w':
129 | writer.writerow(['savedir', 'cent', 'acc', 'n_active'])
130 | line = [savedir]
131 | line.append('%.4f' % logger.get('cent'))
132 | line.append('%.4f' % logger.get('acc'))
133 | line.append('-'.join(str(x) for x in np_n_active))
134 | writer.writerow(line)
135 |
136 | if __name__=='__main__':
137 | if args.mode == 'train':
138 | train()
139 | elif args.mode == 'test':
140 | test()
141 | elif args.mode == 'record':
142 | record()
143 | else:
144 | raise ValueError('Invalid mode %s' % args.mode)
145 |
--------------------------------------------------------------------------------
/scripts/lenet_dense/utils:
--------------------------------------------------------------------------------
1 | ../../utils/
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenXAIProject/Network-Structure-Dropout/723df2d2392ec16eca3452d4afb81d54c4a2f841/utils/__init__.py
--------------------------------------------------------------------------------
/utils/accumulator.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | class Accumulator():
4 | def __init__(self, *args):
5 | self.args = args
6 | self.argdict = {}
7 | for i, arg in enumerate(args):
8 | self.argdict[arg] = i
9 | self.sums = [0]*len(args)
10 | self.cnt = 0
11 |
12 | def accum(self, val):
13 | val = [val] if type(val) is not list else val
14 | val = [v for v in val if v is not None]
15 | assert(len(val) == len(self.args))
16 | for i in range(len(val)):
17 | self.sums[i] += val[i]
18 | self.cnt += 1
19 |
20 | def clear(self):
21 | self.sums = [0]*len(self.args)
22 | self.cnt = 0
23 |
24 | def get(self, arg, avg=True):
25 | i = self.argdict.get(arg, -1)
26 | assert(i is not -1)
27 | return (self.sums[i]/self.cnt if avg else self.sums[i])
28 |
29 | def print_(self, header=None, epoch=None, it=None, time=None,
30 | logfile=None, do_not_print=[], as_int=[],
31 | avg=True):
32 | line = '' if header is None else header + ': '
33 | if epoch is not None:
34 | line += ('epoch %d, ' % epoch)
35 | if it is not None:
36 | line += ('iter %d, ' % it)
37 | if time is not None:
38 | line += ('(%.3f secs), ' % time)
39 |
40 | args = [arg for arg in self.args if arg not in do_not_print]
41 |
42 | for arg in args[:-1]:
43 | val = self.sums[self.argdict[arg]]
44 | if avg:
45 | val /= self.cnt
46 | if arg in as_int:
47 | line += ('%s %d, ' % (arg, int(val)))
48 | else:
49 | line += ('%s %f, ' % (arg, val))
50 | val = self.sums[self.argdict[args[-1]]]
51 | if avg:
52 | val /= self.cnt
53 | if arg in as_int:
54 | line += ('%s %d, ' % (arg, int(val)))
55 | else:
56 | line += ('%s %f' % (args[-1], val))
57 | print(line)
58 |
59 | if logfile is not None:
60 | logfile.write(line + '\n')
61 |
--------------------------------------------------------------------------------
/utils/cifar10.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | import os
4 | import sys
5 | from paths import CIFAR10_PATH
6 |
7 | HEIGHT = 32
8 | WIDTH = 32
9 | DEPTH = 3
10 | NUM_CLASSES = 10
11 | NUM_DATA_FILES = 5
12 | NUM_TRAIN = 10000 * NUM_DATA_FILES
13 | NUM_TEST = 10000
14 |
15 | def record_dataset(filenames):
16 | label_bytes = 1
17 | image_bytes = DEPTH * HEIGHT * WIDTH
18 | record_bytes = label_bytes + image_bytes
19 | return tf.data.FixedLengthRecordDataset(filenames, record_bytes)
20 |
21 | def get_filenames(training):
22 | data_dir = os.path.join(CIFAR10_PATH, 'cifar-10-batches-bin')
23 | if training:
24 | return [os.path.join(data_dir, 'data_batch_%d.bin' % i)
25 | for i in range(1, NUM_DATA_FILES+1)]
26 | else:
27 | return [os.path.join(data_dir, 'test_batch.bin')]
28 |
29 | def parse_record(raw_record):
30 | """Parse a CIFAR-10 record from value."""
31 | # Every record consists of a label followed by the image, with a fixed number
32 | # of bytes for each.
33 | label_offset = 0
34 | label_bytes = 1
35 | image_bytes = DEPTH * HEIGHT * WIDTH
36 | record_bytes = label_bytes + image_bytes
37 |
38 | # Convert from a string to a vector of uint8 that is record_bytes long.
39 | record_vector = tf.decode_raw(raw_record, tf.uint8)
40 |
41 | # The first byte represents the label, which we convert from uint8 to int32.
42 | label = tf.cast(record_vector[label_offset], tf.int32)
43 | label = tf.one_hot(label, NUM_CLASSES)
44 |
45 | # The remaining bytes after the label represent the image, which we reshape
46 | # from [depth * height * width] to [depth, height, width].
47 | depth_major = tf.reshape(
48 | record_vector[label_offset+label_bytes:record_bytes],
49 | [DEPTH, HEIGHT, WIDTH])
50 |
51 | # Convert from [depth, height, width] to [height, width, depth], and cast as
52 | # float32.
53 | image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32)
54 | return image, label
55 |
56 | def preprocess_image(image, training):
57 | """Preprocess a single image of layout [height, width, depth]."""
58 | if training:
59 | # Resize the image to add four extra pixels on each side.
60 | image = tf.image.resize_image_with_crop_or_pad(
61 | image, HEIGHT + 8, WIDTH + 8)
62 |
63 | # Randomly crop a [HEIGHT, WIDTH] section of the image.
64 | image = tf.random_crop(image, [HEIGHT, WIDTH, DEPTH])
65 |
66 | # Randomly flip the image horizontally.
67 | image = tf.image.random_flip_left_right(image)
68 |
69 | # Subtract off the mean and divide by the variance of the pixels.
70 | image = tf.image.per_image_standardization(image)
71 |
72 | # transpose image back to depth major
73 | image = tf.transpose(image, [2, 1, 0])
74 |
75 | return image
76 |
77 | def cifar10_input(batch_size, training):
78 | dataset = record_dataset(get_filenames(training))
79 |
80 | if training:
81 | dataset = dataset.shuffle(buffer_size=NUM_TRAIN)
82 |
83 | dataset = dataset.map(parse_record)
84 | dataset = dataset.map(
85 | lambda image, label: (preprocess_image(image, training), label))
86 |
87 | dataset = dataset.prefetch(8 * batch_size)
88 | dataset = dataset.repeat()
89 | dataset = dataset.batch(batch_size)
90 | iterator = dataset.make_one_shot_iterator()
91 | images, labels = iterator.get_next()
92 |
93 | return images, labels
94 |
--------------------------------------------------------------------------------
/utils/cifar100.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | import os
4 | import sys
5 | from paths import CIFAR100_PATH
6 |
7 | HEIGHT = 32
8 | WIDTH = 32
9 | DEPTH = 3
10 | NUM_SUPER_CLASSES = 20
11 | NUM_CLASSES = 100
12 | NUM_TRAIN = 50000
13 | NUM_TEST = 10000
14 |
15 | def record_dataset(filenames):
16 | label_bytes = 2
17 | image_bytes = DEPTH * HEIGHT * WIDTH
18 | record_bytes = label_bytes + image_bytes
19 | return tf.data.FixedLengthRecordDataset(filenames, record_bytes)
20 |
21 | def get_filenames(training):
22 | data_dir = os.path.join(CIFAR100_PATH, 'cifar-100-binary')
23 | if training:
24 | return [os.path.join(data_dir, 'train.bin')]
25 | else:
26 | return [os.path.join(data_dir, 'test.bin')]
27 |
28 | def parse_record(raw_record):
29 | """Parse a CIFAR-100 record from value."""
30 | # Every record consists of a label followed by the image, with a fixed number
31 | # of bytes for each.
32 | label_bytes = 2
33 | image_bytes = DEPTH * HEIGHT * WIDTH
34 | record_bytes = label_bytes + image_bytes
35 |
36 | # Convert from a string to a vector of uint8 that is record_bytes long.
37 | record_vector = tf.decode_raw(raw_record, tf.uint8)
38 |
39 | # The first and second bytes represent the super label and the label,
40 | # which we convert from uint8 to int32.
41 | slabel = tf.cast(record_vector[0], tf.int32)
42 | slabel = tf.one_hot(slabel, NUM_SUPER_CLASSES)
43 | label = tf.cast(record_vector[1], tf.int32)
44 | label = tf.one_hot(label, NUM_CLASSES)
45 |
46 | # The remaining bytes after the label represent the image, which we reshape
47 | # from [depth * height * width] to [depth, height, width].
48 | depth_major = tf.reshape(
49 | record_vector[label_bytes:record_bytes],
50 | [DEPTH, HEIGHT, WIDTH])
51 |
52 | # Convert from [depth, height, width] to [height, width, depth], and cast as
53 | # float32.
54 | image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32)
55 | return image, slabel, label
56 |
57 | def preprocess_image(image, training):
58 | """Preprocess a single image of layout [height, width, depth]."""
59 | if training:
60 | # Resize the image to add four extra pixels on each side.
61 | image = tf.image.resize_image_with_crop_or_pad(
62 | image, HEIGHT + 8, WIDTH + 8)
63 |
64 | # Randomly crop a [HEIGHT, WIDTH] section of the image.
65 | image = tf.random_crop(image, [HEIGHT, WIDTH, DEPTH])
66 |
67 | # Randomly flip the image horizontally.
68 | image = tf.image.random_flip_left_right(image)
69 |
70 | # Subtract off the mean and divide by the variance of the pixels.
71 | image = tf.image.per_image_standardization(image)
72 |
73 | # transpose image back to depth major
74 | image = tf.transpose(image, [2, 1, 0])
75 |
76 | return image
77 |
78 | def cifar100_input(batch_size, training):
79 | dataset = record_dataset(get_filenames(training))
80 |
81 | if training:
82 | dataset = dataset.shuffle(buffer_size=NUM_TRAIN)
83 |
84 | dataset = dataset.map(parse_record)
85 | dataset = dataset.map(
86 | lambda image, slabel, label: \
87 | (preprocess_image(image, training), slabel, label))
88 |
89 | dataset = dataset.prefetch(8 * batch_size)
90 | dataset = dataset.repeat()
91 | dataset = dataset.batch(batch_size)
92 | iterator = dataset.make_one_shot_iterator()
93 | images, slabels, labels = iterator.get_next()
94 |
95 | return images, slabels, labels
96 |
--------------------------------------------------------------------------------
/utils/mnist.py:
--------------------------------------------------------------------------------
1 | from tensorflow.examples.tutorials.mnist import input_data
2 | from paths import MNIST_PATH
3 |
4 | def mnist_input(batch_size):
5 | mnist = input_data.read_data_sets(MNIST_PATH, one_hot=True, validation_size=0)
6 | n_train_batches = mnist.train.num_examples/batch_size
7 | n_test_batches = mnist.test.num_examples/batch_size
8 | return mnist, n_train_batches, n_test_batches
9 |
--------------------------------------------------------------------------------
/utils/train.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.python.client import device_lib
3 |
4 | def cross_entropy(logits, labels):
5 | return tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=labels)
6 |
7 | def weight_decay(decay, var_list=None):
8 | var_list = tf.trainable_variables() if var_list is None else var_list
9 | return decay*tf.add_n([tf.nn.l2_loss(var) for var in var_list])
10 |
11 | def accuracy(logits, labels):
12 | correct = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
13 | return tf.reduce_mean(tf.cast(correct, tf.float32))
14 |
15 | def get_train_op(optim, loss, global_step=None, clip=None, var_list=None):
16 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
17 | with tf.control_dependencies(update_ops):
18 | grad_and_vars = optim.compute_gradients(loss, var_list=var_list)
19 | if clip is not None:
20 | grad_and_vars = [((None if grad is None \
21 | else tf.clip_by_norm(grad, clip)), var) \
22 | for grad, var in grad_and_vars]
23 | train_op = optim.apply_gradients(grad_and_vars, global_step=global_step)
24 | return train_op
25 |
26 | # copied from https://stackoverflow.com/a/38580201
27 | def get_available_gpus():
28 | local_device_protos = device_lib.list_local_devices()
29 | mem_thres = 0.3*max([x.memory_limit for x in local_device_protos \
30 | if x.device_type=='GPU'])
31 | return [x.name for x in local_device_protos if x.device_type=='GPU' \
32 | and x.memory_limit > mem_thres]
33 |
34 | def average_gradients(tower_grads):
35 | """Calculate the average gradient for each shared variable across all towers.
36 |
37 | Note that this function provides a synchronization point across all towers.
38 |
39 | Args:
40 | tower_grads: List of lists of (gradient, variable) tuples. The outer list
41 | is over individual gradients. The inner list is over the gradient
42 | calculation for each tower.
43 | Returns:
44 | List of pairs of (gradient, variable) where the gradient has been averaged
45 | across all towers.
46 | """
47 | average_grads = []
48 | for grad_and_vars in zip(*tower_grads):
49 | # Note that each grad_and_vars looks like the following:
50 | # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
51 | grads = []
52 | for g, _ in grad_and_vars:
53 | # Add 0 dimension to the gradients to represent the tower.
54 | expanded_g = tf.expand_dims(g, 0)
55 |
56 | # Append on a 'tower' dimension which we will average over below.
57 | grads.append(expanded_g)
58 |
59 | # Average over the 'tower' dimension.
60 | grad = tf.concat(axis=0, values=grads)
61 | grad = tf.reduce_mean(grad, 0)
62 |
63 | # Keep in mind that the Variables are redundant because they are shared
64 | # across towers. So .. we will just return the first tower's pointer to
65 | # the Variable.
66 | v = grad_and_vars[0][1]
67 | grad_and_var = (grad, v)
68 | average_grads.append(grad_and_var)
69 | return average_grads
70 |
--------------------------------------------------------------------------------