├── .gitignore
├── INQPruning.ipynb
├── KmeansPruning.ipynb
├── LICENSE
├── README.md
├── config.py
├── densenet.py
├── densenetfinalDNS.py
├── densenetfinalinq.py
├── densenetfinalkmeans.py
├── densenetfinalprune.py
├── densenetfinaltest.py
├── dnet_INQ-analysis.ipynb
├── dnet_INQ.ipynb
├── dnet_dns_analysis.ipynb
├── dnet_dns_analysis2.ipynb
├── dnet_kmeans.ipynb
├── dnet_prune.ipynb
└── dnet_prune_analysis.ipynb


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/INQPruning.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "##################Build Essential DenseNet to Load Pretrained Parameters############################################### \n",
 12 |     "# encoding=utf8  \n",
 13 |     "import numpy as np\n",
 14 |     "import tensorflow as tf\n",
 15 |     "\n",
 16 |     "def unpickle(file):\n",
 17 |     "  import _pickle as cPickle\n",
 18 |     "  fo = open(file, 'rb')\n",
 19 |     "  dict = cPickle.load(fo,encoding='latin1')\n",
 20 |     "  fo.close()\n",
 21 |     "  if 'data' in dict:\n",
 22 |     "    dict['data'] = dict['data'].reshape((-1, 3, 32, 32)).swapaxes(1, 3).swapaxes(1, 2).reshape(-1, 32*32*3) / 256.\n",
 23 |     "\n",
 24 |     "  return dict"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 2,
 30 |    "metadata": {
 31 |     "collapsed": true
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "def load_data_one(f):\n",
 36 |     "  batch = unpickle(f)\n",
 37 |     "  data = batch['data']\n",
 38 |     "  labels = batch['labels']\n",
 39 |     "  print (\"Loading %s: %d\" % (f, len(data)))\n",
 40 |     "  return data, labels\n"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 3,
 46 |    "metadata": {
 47 |     "collapsed": true
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "\n",
 52 |     "def load_data(files, data_dir, label_count):\n",
 53 |     "  data, labels = load_data_one(data_dir + '/' + files[0])\n",
 54 |     "  for f in files[1:]:\n",
 55 |     "    data_n, labels_n = load_data_one(data_dir + '/' + f)\n",
 56 |     "    data = np.append(data, data_n, axis=0)\n",
 57 |     "    labels = np.append(labels, labels_n, axis=0)\n",
 58 |     "  labels = np.array([ [ float(i == label) for i in range(label_count) ] for label in labels ])\n",
 59 |     "  return data, labels"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 4,
 65 |    "metadata": {
 66 |     "collapsed": true
 67 |    },
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "def run_in_batch_avg(session, tensors, batch_placeholders, feed_dict={}, batch_size=200):                              \n",
 71 |     "  res = [ 0 ] * len(tensors)                                                                                           \n",
 72 |     "  batch_tensors = [ (placeholder, feed_dict[ placeholder ]) for placeholder in batch_placeholders ]                    \n",
 73 |     "  total_size = len(batch_tensors[0][1])                                                                                \n",
 74 |     "  batch_count = (total_size + batch_size - 1) / batch_size                                                             \n",
 75 |     "  for batch_idx in range(batch_count):                                                                                \n",
 76 |     "    current_batch_size = None                                                                                          \n",
 77 |     "    for (placeholder, tensor) in batch_tensors:                                                                        \n",
 78 |     "      batch_tensor = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]                                         \n",
 79 |     "      current_batch_size = len(batch_tensor)                                                                           \n",
 80 |     "      feed_dict[placeholder] = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]                               \n",
 81 |     "    tmp = session.run(tensors, feed_dict=feed_dict)                                                                    \n",
 82 |     "    res = [ r + t * current_batch_size for (r, t) in zip(res, tmp) ]                                                   \n",
 83 |     "  return [ r / float(total_size) for r in res ]\n"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 5,
 89 |    "metadata": {
 90 |     "collapsed": true
 91 |    },
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "def weight_variable(shape):\n",
 95 |     "  initial = tf.truncated_normal(shape, stddev=0.01)\n",
 96 |     "  return tf.Variable(initial)\n",
 97 |     "\n",
 98 |     "def bias_variable(shape):\n",
 99 |     "  initial = tf.constant(0.01, shape=shape)\n",
100 |     "  return tf.Variable(initial)\n",
101 |     "\n",
102 |     "def conv2d(input, in_features, out_features, kernel_size, with_bias=False):\n",
103 |     "  W = weight_variable([ kernel_size, kernel_size, in_features, out_features ])\n",
104 |     "  conv = tf.nn.conv2d(input, W, [ 1, 1, 1, 1 ], padding='SAME')\n",
105 |     "  if with_bias:\n",
106 |     "    return conv + bias_variable([ out_features ])\n",
107 |     "  return conv\n",
108 |     "\n",
109 |     "def batch_activ_conv(current, in_features, out_features, kernel_size, is_training, keep_prob):\n",
110 |     "  current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)\n",
111 |     "  current = tf.nn.relu(current)\n",
112 |     "  current = conv2d(current, in_features, out_features, kernel_size)\n",
113 |     "  current = tf.nn.dropout(current, keep_prob)\n",
114 |     "  return current\n",
115 |     "\n",
116 |     "def block(input, layers, in_features, growth, is_training, keep_prob):\n",
117 |     "  current = input\n",
118 |     "  features = in_features\n",
119 |     "  for idx in range(layers):\n",
120 |     "    tmp = batch_activ_conv(current, features, growth, 3, is_training, keep_prob)\n",
121 |     "    current = tf.concat((current, tmp),3)\n",
122 |     "    features += growth\n",
123 |     "  return current, features\n",
124 |     "\n",
125 |     "def avg_pool(input, s):\n",
126 |     "  return tf.nn.avg_pool(input, [ 1, s, s, 1 ], [1, s, s, 1 ], 'VALID')\n",
127 |     "\n"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 6,
133 |    "metadata": {
134 |     "collapsed": true
135 |    },
136 |    "outputs": [],
137 |    "source": [
138 |     "data_dir = './data'\n",
139 |     "image_size = 32\n",
140 |     "image_dim = image_size * image_size * 3\n",
141 |     "# meta = unpickle(data_dir + '/batches.meta')\n",
142 |     "# label_names = meta['label_names']\n",
143 |     "# label_count = len(label_names)\n",
144 |     "label_count = 10\n",
145 |     "# train_files = [ 'data_batch_%d' % d for d in range(1, 6) ]\n",
146 |     "# train_data, train_labels = load_data(train_files, data_dir, label_count)\n",
147 |     "# pi = np.random.permutation(len(train_data))\n",
148 |     "# train_data, train_labels = train_data[pi], train_labels[pi]\n",
149 |     "# test_data, test_labels = load_data([ 'test_batch' ], data_dir, label_count)\n",
150 |     "# print (\"Train:\", np.shape(train_data), np.shape(train_labels))\n",
151 |     "# print (\"Test:\", np.shape(test_data), np.shape(test_labels))\n",
152 |     "# data = { 'train_data': train_data,\n",
153 |     "#   'train_labels': train_labels,\n",
154 |     "#   'test_data': test_data,\n",
155 |     "#   'test_labels': test_labels }\n",
156 |     "depth = 40"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": 7,
162 |    "metadata": {
163 |     "collapsed": true
164 |    },
165 |    "outputs": [],
166 |    "source": [
167 |     "\n",
168 |     "weight_decay = 1e-4\n",
169 |     "layers = int((depth - 4) / 3)\n",
170 |     "graph = tf.Graph()\n",
171 |     "\n",
172 |     "xs = tf.placeholder(\"float\", shape=[None, image_dim])\n",
173 |     "ys = tf.placeholder(\"float\", shape=[None, label_count])\n",
174 |     "lr = tf.placeholder(\"float\", shape=[])\n",
175 |     "keep_prob = tf.placeholder(tf.float32)\n",
176 |     "is_training = tf.placeholder(\"bool\", shape=[])\n",
177 |     "\n",
178 |     "\n",
179 |     "current = tf.reshape(xs, [ -1, 32, 32, 3 ])\n",
180 |     "current = conv2d(current, 3, 16, 3)\n",
181 |     "\n",
182 |     "current, features = block(current, layers, 16, 12, is_training, keep_prob)\n",
183 |     "current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)\n",
184 |     "current = avg_pool(current, 2)\n",
185 |     "current, features = block(current, layers, features, 12, is_training, keep_prob)\n",
186 |     "current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)\n",
187 |     "current = avg_pool(current, 2)\n",
188 |     "current, features = block(current, layers, features, 12, is_training, keep_prob)\n",
189 |     "\n",
190 |     "current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)\n",
191 |     "current = tf.nn.relu(current)\n",
192 |     "current = avg_pool(current, 8)\n",
193 |     "final_dim = features\n",
194 |     "current = tf.reshape(current, [ -1, final_dim ])\n",
195 |     "Wfc = weight_variable([ final_dim, label_count ])\n",
196 |     "bfc = bias_variable([ label_count ])\n",
197 |     "ys_ = tf.nn.softmax( tf.matmul(current, Wfc) + bfc )\n",
198 |     "\n",
199 |     "cross_entropy = -tf.reduce_mean(ys * tf.log(ys_ + 1e-12))\n",
200 |     "l2 = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])\n",
201 |     "train_step = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True).minimize(cross_entropy + l2 * weight_decay)\n",
202 |     "correct_prediction = tf.equal(tf.argmax(ys_, 1), tf.argmax(ys, 1))\n",
203 |     "accuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\"))\n",
204 |     "    \n"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": 8,
210 |    "metadata": {
211 |     "collapsed": true
212 |    },
213 |    "outputs": [],
214 |    "source": [
215 |     "para_dict={}\n",
216 |     "for k in tf.global_variables():\n",
217 |     "    if k not in tf.contrib.framework.get_variables_by_suffix('Momentum'):#Load all parameters except ones of optimization functions\n",
218 |     "            para_dict[k.name[:-2]] = k"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": 9,
224 |    "metadata": {},
225 |    "outputs": [
226 |     {
227 |      "name": "stdout",
228 |      "output_type": "stream",
229 |      "text": [
230 |       "INFO:tensorflow:Restoring parameters from ./inqmodel/stage2/inqcom16_97/inqcom1697_92569_4.ckpt\n"
231 |      ]
232 |     }
233 |    ],
234 |    "source": [
235 |     "sess=tf.InteractiveSession()\n",
236 |     "saver = tf.train.Saver(para_dict)\n",
237 |     "#saver.restore(sess,'./inqmodel/stage2/64pinq80/64pinq80ok_93149_7.ckpt')\n",
238 |     "#saver.restore(sess,'./modellog/weightonlypara93.ckpt')\n",
239 |     "saver.restore(sess,'./inqmodel/stage2/inqcom16_97/inqcom1697_92569_4.ckpt')\n",
240 |     "#saver.restore(sess,'./prunemodel/stage2/inc100adj/prune100ar_92969_10ok.ckpt')\n",
241 |     "\n",
242 |     "##################End of Pretrained Parameters Loading############################################### "
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": 10,
248 |    "metadata": {
249 |     "collapsed": true
250 |    },
251 |    "outputs": [],
252 |    "source": [
253 |     "import config\n",
254 |     "#Nearly all hyperparameters are set in config.py "
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": 11,
260 |    "metadata": {
261 |     "collapsed": true
262 |    },
263 |    "outputs": [],
264 |    "source": [
265 |     "def apply_inq(weights, inq_dict):\n",
266 |     "   \n",
267 |     "    for target in config.all_para:\n",
268 |     "        wl = target\n",
269 |     "        bit = config.inq_para[wl]\n",
270 |     "        # Get target layer's weights\n",
271 |     "        weight_obj = weights[wl]\n",
272 |     "        weight_arr = weight_obj.eval()\n",
273 |     "        \n",
274 |     "        \n",
275 |     "        weight_rest = np.reshape(weight_arr,[-1])\n",
276 |     "        dic_tem = np.reshape(inq_dict[wl],[-1])\n",
277 |     "        idx_rest = np.flip(np.argsort(abs(np.reshape(weight_rest,[-1]))),0) #choose which weights to be INQed\n",
278 |     "        num_prune = int(len(weight_rest)*config.inqpercen_para[wl]*config.inqprune_para[wl])#how many weights to be INQed after pruning\n",
279 |     "     \n",
280 |     "    \n",
281 |     "        #calculate INQ bounds\n",
282 |     "        weight_toINQ = weight_rest[idx_rest[:num_prune]] \n",
283 |     "        n1 = (np.floor(np.log2(max(abs(np.reshape(weight_arr,[-1])))*4/3)))\n",
284 |     "        n2 = n1 +1 - bit/4\n",
285 |     "        print(n1,n2,n1-n2)\n",
286 |     "        upper_bound = 2**(np.floor(np.log2(max(abs(np.reshape(weight_arr,[-1])))*4/3)))\n",
287 |     "        lower_bound = 2**(n1 +1 - bit/4)\n",
288 |     "       \n",
289 |     "    \n",
290 |     "       #INQ\n",
291 |     "        weight_toINQ[abs(weight_toINQ) < lower_bound] = 0\n",
292 |     "        weight_toINQ[weight_toINQ != 0] = 2**(np.floor(np.log2(abs(weight_toINQ[weight_toINQ != 0]*4/3))))*np.sign(weight_toINQ[weight_toINQ != 0])\n",
293 |     "\n",
294 |     "        # Apply pruning\n",
295 |     "        weight_rest[idx_rest[:num_prune]] = weight_toINQ\n",
296 |     "        weight_arr =  np.reshape(weight_rest,np.shape(weight_arr))\n",
297 |     "        dic_tem [idx_rest[:num_prune]] =  np.zeros_like(dic_tem [idx_rest[:num_prune]])\n",
298 |     "        inq_dict[wl] = np.reshape(dic_tem,np.shape(inq_dict[wl]))\n",
299 |     "       # print('left',sum(np.reshape(inq_dict[wl],-1)))\n",
300 |     "\n",
301 |     "        # Store pruned weights as tensorflow objects\n",
302 |     "        sess.run(weight_obj.assign(weight_arr))\n",
303 |     "\n",
304 |     "    return inq_dict"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": 12,
310 |    "metadata": {
311 |     "collapsed": true
312 |    },
313 |    "outputs": [],
314 |    "source": [
315 |     "prune_dict = {}\n",
316 |     "for target in config.all_para:\n",
317 |     "    wl =target\n",
318 |     "    weight_obj = para_dict[wl]\n",
319 |     "    prune_dict[wl] = np.ones_like(weight_obj.eval())"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "code",
324 |    "execution_count": 13,
325 |    "metadata": {},
326 |    "outputs": [
327 |     {
328 |      "name": "stdout",
329 |      "output_type": "stream",
330 |      "text": [
331 |       "-1.0 -4.0 3.0\n",
332 |       "Variable 0.0625 0.176776695297 0.353553390593\n",
333 |       "-2.0 -5.0 3.0\n",
334 |       "Variable_1 0.03125 0.148650889375 0.210224103813\n",
335 |       "-2.0 -5.0 3.0\n",
336 |       "Variable_2 0.03125 0.148650889375 0.210224103813\n",
337 |       "-2.0 -5.0 3.0\n",
338 |       "Variable_3 0.03125 0.148650889375 0.210224103813\n",
339 |       "-2.0 -5.0 3.0\n",
340 |       "Variable_4 0.03125 0.148650889375 0.210224103813\n",
341 |       "-3.0 -6.0 3.0\n",
342 |       "Variable_5 0.015625 0.136313466583 0.114625505401\n",
343 |       "-3.0 -6.0 3.0\n",
344 |       "Variable_6 0.015625 0.136313466583 0.114625505401\n",
345 |       "-3.0 -6.0 3.0\n",
346 |       "Variable_7 0.015625 0.136313466583 0.114625505401\n",
347 |       "-2.0 -5.0 3.0\n",
348 |       "Variable_8 0.03125 0.148650889375 0.210224103813\n",
349 |       "-3.0 -6.0 3.0\n",
350 |       "Variable_9 0.015625 0.136313466583 0.114625505401\n",
351 |       "-3.0 -6.0 3.0\n",
352 |       "Variable_10 0.015625 0.136313466583 0.114625505401\n",
353 |       "-3.0 -6.0 3.0\n",
354 |       "Variable_11 0.015625 0.136313466583 0.114625505401\n",
355 |       "-3.0 -6.0 3.0\n",
356 |       "Variable_12 0.015625 0.136313466583 0.114625505401\n",
357 |       "-2.0 -5.0 3.0\n",
358 |       "Variable_13 0.03125 0.148650889375 0.210224103813\n",
359 |       "-3.0 -6.0 3.0\n",
360 |       "Variable_14 0.015625 0.136313466583 0.114625505401\n",
361 |       "-2.0 -5.0 3.0\n",
362 |       "Variable_15 0.03125 0.148650889375 0.210224103813\n",
363 |       "-2.0 -5.0 3.0\n",
364 |       "Variable_16 0.03125 0.148650889375 0.210224103813\n",
365 |       "-3.0 -6.0 3.0\n",
366 |       "Variable_17 0.015625 0.136313466583 0.114625505401\n",
367 |       "-3.0 -6.0 3.0\n",
368 |       "Variable_18 0.015625 0.136313466583 0.114625505401\n",
369 |       "-3.0 -6.0 3.0\n",
370 |       "Variable_19 0.015625 0.136313466583 0.114625505401\n",
371 |       "-3.0 -6.0 3.0\n",
372 |       "Variable_20 0.015625 0.136313466583 0.114625505401\n",
373 |       "-3.0 -6.0 3.0\n",
374 |       "Variable_21 0.015625 0.136313466583 0.114625505401\n",
375 |       "-3.0 -6.0 3.0\n",
376 |       "Variable_22 0.015625 0.136313466583 0.114625505401\n",
377 |       "-3.0 -6.0 3.0\n",
378 |       "Variable_23 0.015625 0.136313466583 0.114625505401\n",
379 |       "-3.0 -6.0 3.0\n",
380 |       "Variable_24 0.015625 0.136313466583 0.114625505401\n",
381 |       "-3.0 -6.0 3.0\n",
382 |       "Variable_25 0.015625 0.136313466583 0.114625505401\n",
383 |       "-3.0 -6.0 3.0\n",
384 |       "Variable_26 0.015625 0.136313466583 0.114625505401\n",
385 |       "-3.0 -6.0 3.0\n",
386 |       "Variable_27 0.015625 0.136313466583 0.114625505401\n",
387 |       "-3.0 -6.0 3.0\n",
388 |       "Variable_28 0.015625 0.136313466583 0.114625505401\n",
389 |       "-3.0 -6.0 3.0\n",
390 |       "Variable_29 0.015625 0.136313466583 0.114625505401\n",
391 |       "-3.0 -6.0 3.0\n",
392 |       "Variable_30 0.015625 0.136313466583 0.114625505401\n",
393 |       "-3.0 -6.0 3.0\n",
394 |       "Variable_31 0.015625 0.136313466583 0.114625505401\n",
395 |       "-3.0 -6.0 3.0\n",
396 |       "Variable_32 0.015625 0.136313466583 0.114625505401\n",
397 |       "-3.0 -6.0 3.0\n",
398 |       "Variable_33 0.015625 0.136313466583 0.114625505401\n",
399 |       "-3.0 -6.0 3.0\n",
400 |       "Variable_34 0.015625 0.136313466583 0.114625505401\n",
401 |       "-3.0 -6.0 3.0\n",
402 |       "Variable_35 0.015625 0.136313466583 0.114625505401\n",
403 |       "-3.0 -6.0 3.0\n",
404 |       "Variable_36 0.015625 0.136313466583 0.114625505401\n",
405 |       "-3.0 -6.0 3.0\n",
406 |       "Variable_37 0.015625 0.136313466583 0.114625505401\n",
407 |       "-3.0 -6.0 3.0\n",
408 |       "Variable_38 0.015625 0.136313466583 0.114625505401\n",
409 |       "0.0 -3.0 3.0\n",
410 |       "Variable_39 0.125 0.25 0.5\n"
411 |      ]
412 |     }
413 |    ],
414 |    "source": [
415 |     "prune_dict = apply_inq(para_dict, prune_dict)"
416 |    ]
417 |   },
418 |   {
419 |    "cell_type": "code",
420 |    "execution_count": 14,
421 |    "metadata": {},
422 |    "outputs": [
423 |     {
424 |      "data": {
425 |       "text/plain": [
426 |        "'./inqmodel/stage1/inqcom1697100s.ckpt'"
427 |       ]
428 |      },
429 |      "execution_count": 14,
430 |      "metadata": {},
431 |      "output_type": "execute_result"
432 |     }
433 |    ],
434 |    "source": [
435 |     "saver.save(sess,'./inqmodel/stage1/inqcom1697100s.ckpt')#save INQed parameters\n"
436 |    ]
437 |   },
438 |   {
439 |    "cell_type": "code",
440 |    "execution_count": 15,
441 |    "metadata": {
442 |     "collapsed": true
443 |    },
444 |    "outputs": [],
445 |    "source": [
446 |     "#load pruning mask\n",
447 |     "import pickle\n",
448 |     "f2 = open(\"./prunemodel/stage2/inc100adj/prune100ar.txt\",\"rb\")\n",
449 |     "nz_list = pickle.load(f2)\n",
450 |     "f2.close()"
451 |    ]
452 |   },
453 |   {
454 |    "cell_type": "code",
455 |    "execution_count": 16,
456 |    "metadata": {
457 |     "collapsed": true
458 |    },
459 |    "outputs": [],
460 |    "source": [
461 |     "for k in prune_dict.items():\n",
462 |     "    layer = k[0]\n",
463 |     "    prune_dict[k[0]] = np.multiply(prune_dict[k[0]],nz_list[k[0]]) #combine pruning mask and INQ mask"
464 |    ]
465 |   },
466 |   {
467 |    "cell_type": "code",
468 |    "execution_count": null,
469 |    "metadata": {
470 |     "collapsed": true
471 |    },
472 |    "outputs": [],
473 |    "source": [
474 |     "#save INQ mask\n",
475 |     "import pickle\n",
476 |     "# create dict\n",
477 |     "# save dict\n",
478 |     "f1 = open(\"C:/Users/lhlne/Desktop/project/densenet/inqmodel/stage1/inqcom1697.txt\",\"wb\")\n",
479 |     "pickle.dump(prune_dict, f1)\n",
480 |     "f1.close()\n",
481 |     "# load dict\n",
482 |     "f2 = open(\"C:/Users/lhlne/Desktop/project/densenet/inqmodel/stage1/inqcom1697.txt\",\"rb\")\n",
483 |     "load_list = pickle.load(f2)\n",
484 |     "f2.close()\n",
485 |     "# print \n",
486 |     "print(load_list)"
487 |    ]
488 |   }
489 |  ],
490 |  "metadata": {
491 |   "kernelspec": {
492 |    "display_name": "Python 3",
493 |    "language": "python",
494 |    "name": "python3"
495 |   },
496 |   "language_info": {
497 |    "codemirror_mode": {
498 |     "name": "ipython",
499 |     "version": 3
500 |    },
501 |    "file_extension": ".py",
502 |    "mimetype": "text/x-python",
503 |    "name": "python",
504 |    "nbconvert_exporter": "python",
505 |    "pygments_lexer": "ipython3",
506 |    "version": "3.6.1"
507 |   }
508 |  },
509 |  "nbformat": 4,
510 |  "nbformat_minor": 2
511 | }
512 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Project
 2 | 
 3 | Tensorflow implementations of Iterative Pruning, K-means Quantization, Dynamic Surgery Network, Incremental Network Quantization
 4 | 
 5 | Codes are implemented according to reference paper and codes.
 6 | 
 7 | Masks and modified weights are produced on Single Machine.
 8 | 
 9 | Re-training process are conducted on Google Cloud
10 | 
11 | All hyperparameters are set in config.py except DNS(to replicate its open-sourse code on Caffe)
12 | 
13 | All codes except Analysis&Experiments part are documented.
14 | 
15 | densenet.py: essential DenseNet modified from https://github.com/LaurentMazare/deep-models/tree/master/densenet
16 | 
17 | densenetfinaltest.py: inference of DenseNet
18 | 
19 | ## Iterative Pruning
20 | 
21 | densenetfinalprune.py: produce masks & pruned weights
22 | 
23 | dnet_prune.ipynb: re-training of Iterative Pruning
24 | 
25 | ## K-means Quantization
26 | 
27 | densenetfinalkmeans.py: produce masks & K-Qed weights
28 | 
29 | dnet_kmeans.ipynb: re-training of codebooks after K-means Quantization
30 | 
31 | ## Dynamic Surgery Network
32 | 
33 | densenetfinalDNS.py: DNS
34 | 
35 | ## Incremental Network Quantization
36 | 
37 | densenetfinalinq.py: produce masks & INQed weights
38 | 
39 | dnet_INQ.ipynb: re-training of INQ
40 | 
41 | ## Combinations:
42 | 
43 | Pruning+INQ: INQPruning.ipynb
44 | 
45 | Pruning+K-Q: KmeansPruning.ipynb
46 | 
47 | ## Analysis&Experiments:
48 | 
49 | dnet_dns_analysis.ipynb
50 | 
51 | dnet_dns_analysis2.ipynb
52 | 
53 | dnet_INQ-analysis.ipynb
54 | 
55 | dnet_prune_analysis.ipynb
56 | 
57 | 
58 | ## References:
59 | 
60 | https://github.com/gstaff/tfzip/tree/master/tfzip
61 | 
62 | https://github.com/garion9013/impl-pruning-TF
63 | 
64 | https://github.com/yiwenguo/Dynamic-Network-Surgery
65 | 
66 | https://arxiv.org/abs/1510.00149
67 | 
68 | https://arxiv.org/abs/1702.03044
69 | 
70 | https://arxiv.org/abs/1608.04493
71 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | all_para =['Variable',
  4 |  'Variable_1',
  5 |  'Variable_2',
  6 |  'Variable_3',
  7 |  'Variable_4',
  8 |  'Variable_5',
  9 |  'Variable_6',
 10 |  'Variable_7',
 11 |  'Variable_8',
 12 |  'Variable_9',
 13 |  'Variable_10',
 14 |  'Variable_11',
 15 |  'Variable_12',
 16 |  'Variable_13',
 17 |  'Variable_14',
 18 |  'Variable_15',
 19 |  'Variable_16',
 20 |  'Variable_17',
 21 |  'Variable_18',
 22 |  'Variable_19',
 23 |  'Variable_20',
 24 |  'Variable_21',
 25 |  'Variable_22',
 26 |  'Variable_23',
 27 |  'Variable_24',
 28 |  'Variable_25',
 29 |  'Variable_26',
 30 |  'Variable_27',
 31 |  'Variable_28',
 32 |  'Variable_29',
 33 |  'Variable_30',
 34 |  'Variable_31',
 35 |  'Variable_32',
 36 |  'Variable_33',
 37 |  'Variable_34',
 38 |  'Variable_35',
 39 |  'Variable_36',
 40 |  'Variable_37',
 41 |  'Variable_38',
 42 |  'Variable_39']
 43 | 
 44 | 
 45 | 
 46 | exc_para =[#'Variable',
 47 |  #'Variable_1',
 48 |  'Variable_2',
 49 |  'Variable_3',
 50 |  'Variable_4',
 51 |  'Variable_5',
 52 |  'Variable_6',
 53 |  'Variable_7',
 54 |  'Variable_8',
 55 |  'Variable_9',
 56 |  'Variable_10',
 57 |  'Variable_11',
 58 |  'Variable_12',
 59 | # 'Variable_13',
 60 |  'Variable_14',
 61 |  'Variable_15',
 62 |  'Variable_16',
 63 |  'Variable_17',
 64 |  'Variable_18',
 65 |  'Variable_19',
 66 |  'Variable_20',
 67 |  'Variable_21',
 68 |  'Variable_22',
 69 |  'Variable_23',
 70 |  'Variable_24',
 71 |  'Variable_25',
 72 | # 'Variable_26',
 73 |  'Variable_27',
 74 |  'Variable_28',
 75 |  'Variable_29',
 76 |  'Variable_30',
 77 |  'Variable_31',
 78 |  'Variable_32',
 79 |  'Variable_33',
 80 |  'Variable_34',
 81 |  'Variable_35',
 82 |  'Variable_36',
 83 |  'Variable_37',
 84 |  'Variable_38']
 85 | # 'Variable_39']
 86 | trans_layer = ['Variable_13','Variable_26']
 87 | 
 88 | block_1 = [#'Variable_1',
 89 |  'Variable_2',
 90 |  'Variable_3',
 91 |  'Variable_4',
 92 |  'Variable_5',
 93 |  'Variable_6',
 94 |  'Variable_7',
 95 |  'Variable_8',
 96 |  'Variable_9',
 97 |  'Variable_10',
 98 |  'Variable_11',
 99 |  'Variable_12',]
100 | 
101 | block_2 = ['Variable_14',
102 |  'Variable_15',
103 |  'Variable_16',
104 |  'Variable_17',
105 |  'Variable_18',
106 |  'Variable_19',
107 |  'Variable_20',
108 |  'Variable_21',
109 |  'Variable_22',
110 |  'Variable_23',
111 |  'Variable_24',
112 |  'Variable_25',]
113 | 
114 | block_3 =  ['Variable_27',
115 |  'Variable_28',
116 |  'Variable_29',
117 |  'Variable_30',
118 |  'Variable_31',
119 |  'Variable_32',
120 |  'Variable_33',
121 |  'Variable_34',
122 |  'Variable_35',
123 |  'Variable_36',
124 |  'Variable_37',
125 |  'Variable_38']
126 | 
127 | fc_layer = ['Variable_39']
128 | 
129 | prune_para = {}
130 | for k in all_para:
131 |     prune_para[k] = 0.75
132 | prune_para['Variable'] = 0.1
133 | prune_para['Variable_1'] = 0.45
134 | prune_para['Variable_3'] = 0.65
135 | 
136 | prune_para['Variable_5'] = 0.7
137 | prune_para['Variable_6'] = 0.65
138 | 
139 | prune_para['Variable_13'] = 0.35
140 | prune_para['Variable_26'] =0.5
141 | 
142 | prune_para['Variable_34'] = 0.85
143 | prune_para['Variable_35'] = 0.9
144 | 
145 | prune_para['Variable_36'] = 0.95
146 | prune_para['Variable_37'] = 0.95
147 | prune_para['Variable_38'] = 0.95
148 | prune_para['Variable_39'] = 0.9
149 | 
150 | 
151 | 
152 | dns_para =[#'Variable',
153 |  'Variable_1',
154 |  'Variable_2',
155 |  'Variable_3',
156 |  'Variable_4',
157 |  'Variable_5',
158 |  'Variable_6',
159 |  'Variable_7',
160 |  'Variable_8',
161 |  'Variable_9',
162 |  'Variable_10',
163 |  'Variable_11',
164 |  'Variable_12',
165 |  'Variable_13',
166 |  'Variable_14',
167 |  'Variable_15',
168 |  'Variable_16',
169 |  'Variable_17',
170 |  'Variable_18',
171 |  'Variable_19',
172 |  'Variable_20',
173 |  'Variable_21',
174 |  'Variable_22',
175 |  'Variable_23',
176 |  'Variable_24',
177 |  'Variable_25',
178 |  'Variable_26',
179 |  'Variable_27',
180 |  'Variable_28',
181 |  'Variable_29',
182 |  'Variable_30',
183 |  'Variable_31',
184 |  'Variable_32',
185 |  'Variable_33',
186 |  'Variable_34',
187 |  'Variable_35',
188 |  'Variable_36',
189 |  'Variable_37',
190 |  'Variable_38',
191 |  'Variable_39']
192 | 
193 | crate = {}
194 | for k in all_para:
195 |     crate[k] = 3
196 | crate['Variable'] = 0
197 | crate['Variable_1'] = 1
198 | crate['Variable_13'] = 1
199 | crate['Variable_26'] =1
200 | 
201 | inqpercen_para = {}
202 | for k in all_para:
203 |     inqpercen_para[k] = 1.0
204 |     
205 | inq_para = {}
206 | for k in all_para:
207 |     inq_para[k] = 16
208 |     
209 | #inq_para['Variable'] = 256
210 | #inq_para['Variable_1'] = 128
211 | #inq_para['Variable_13'] = 128
212 | #inq_para['Variable_26'] =128
213 | 
214 | inqprune_para = {}
215 | for k in all_para:
216 |     inqprune_para[k] = 1-0.75
217 | inqprune_para['Variable'] = 1-0.1
218 | inqprune_para['Variable_1'] = 1-0.45
219 | inqprune_para['Variable_3'] = 1-0.65
220 | 
221 | inqprune_para['Variable_5'] = 1-0.7
222 | inqprune_para['Variable_6'] = 1-0.65
223 | 
224 | inqprune_para['Variable_13'] = 1-0.35
225 | inqprune_para['Variable_26'] =1-0.5
226 | 
227 | inqprune_para['Variable_34'] = 1-0.85
228 | inqprune_para['Variable_35'] = 1-0.9
229 | 
230 | inqprune_para['Variable_36'] = 1-0.95
231 | inqprune_para['Variable_37'] = 1-0.95
232 | inqprune_para['Variable_38'] = 1-0.95
233 | inqprune_para['Variable_39'] = 1-0.9    
234 | 
235 | 
236 | kmeans_para = {}
237 | for k in all_para:
238 |     kmeans_para[k] = 64
239 | 
240 | len_dict = {'Variable': 432,
241 |  'Variable_1': 1728,
242 |  'Variable_10': 13392,
243 |  'Variable_11': 14688,
244 |  'Variable_12': 15984,
245 |  'Variable_13': 25600,
246 |  'Variable_14': 17280,
247 |  'Variable_15': 18576,
248 |  'Variable_16': 19872,
249 |  'Variable_17': 21168,
250 |  'Variable_18': 22464,
251 |  'Variable_19': 23760,
252 |  'Variable_2': 3024,
253 |  'Variable_20': 25056,
254 |  'Variable_21': 26352,
255 |  'Variable_22': 27648,
256 |  'Variable_23': 28944,
257 |  'Variable_24': 30240,
258 |  'Variable_25': 31536,
259 |  'Variable_26': 92416,
260 |  'Variable_27': 32832,
261 |  'Variable_28': 34128,
262 |  'Variable_29': 35424,
263 |  'Variable_3': 4320,
264 |  'Variable_30': 36720,
265 |  'Variable_31': 38016,
266 |  'Variable_32': 39312,
267 |  'Variable_33': 40608,
268 |  'Variable_34': 41904,
269 |  'Variable_35': 43200,
270 |  'Variable_36': 44496,
271 |  'Variable_37': 45792,
272 |  'Variable_38': 47088,
273 |  'Variable_39': 4480,
274 |  'Variable_4': 5616,
275 |  'Variable_5': 6912,
276 |  'Variable_6': 8208,
277 |  'Variable_7': 9504,
278 |  'Variable_8': 10800,
279 |  'Variable_9': 12096}
280 |     
281 | 


--------------------------------------------------------------------------------
/densenet.py:
--------------------------------------------------------------------------------
  1 | # encoding=utf8  
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | 
  5 | def unpickle(file):
  6 |   import _pickle as cPickle
  7 |   fo = open(file, 'rb')
  8 |   dict = cPickle.load(fo,encoding='latin1')
  9 |   fo.close()
 10 |   if 'data' in dict:
 11 |     dict['data'] = dict['data'].reshape((-1, 3, 32, 32)).swapaxes(1, 3).swapaxes(1, 2).reshape(-1, 32*32*3) / 256.
 12 | 
 13 |   return dict
 14 | 
 15 | def load_data_one(f):
 16 |   batch = unpickle(f)
 17 |   data = batch['data']
 18 |   labels = batch['labels']
 19 |   print ("Loading %s: %d" % (f, len(data)))
 20 |   return data, labels
 21 | 
 22 | def load_data(files, data_dir, label_count):
 23 |   data, labels = load_data_one(data_dir + '/' + files[0])
 24 |   for f in files[1:]:
 25 |     data_n, labels_n = load_data_one(data_dir + '/' + f)
 26 |     data = np.append(data, data_n, axis=0)
 27 |     labels = np.append(labels, labels_n, axis=0)
 28 |   labels = np.array([ [ float(i == label) for i in range(label_count) ] for label in labels ])
 29 |   return data, labels
 30 | 
 31 | def run_in_batch_avg(session, tensors, batch_placeholders, feed_dict={}, batch_size=200):                              
 32 |   res = [ 0 ] * len(tensors)                                                                                           
 33 |   batch_tensors = [ (placeholder, feed_dict[ placeholder ]) for placeholder in batch_placeholders ]                    
 34 |   total_size = len(batch_tensors[0][1])                                                                                
 35 |   batch_count = (total_size + batch_size - 1) / batch_size                                                             
 36 |   for batch_idx in range(batch_count):                                                                                
 37 |     current_batch_size = None                                                                                          
 38 |     for (placeholder, tensor) in batch_tensors:                                                                        
 39 |       batch_tensor = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]                                         
 40 |       current_batch_size = len(batch_tensor)                                                                           
 41 |       feed_dict[placeholder] = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]                               
 42 |     tmp = session.run(tensors, feed_dict=feed_dict)                                                                    
 43 |     res = [ r + t * current_batch_size for (r, t) in zip(res, tmp) ]                                                   
 44 |   return [ r / float(total_size) for r in res ]
 45 | 
 46 | def weight_variable(shape):
 47 |   initial = tf.truncated_normal(shape, stddev=0.01)
 48 |   return tf.Variable(initial)
 49 | 
 50 | def bias_variable(shape):
 51 |   initial = tf.constant(0.01, shape=shape)
 52 |   return tf.Variable(initial)
 53 | 
 54 | def conv2d(input, in_features, out_features, kernel_size, with_bias=False):
 55 |   W = weight_variable([ kernel_size, kernel_size, in_features, out_features ])
 56 |   conv = tf.nn.conv2d(input, W, [ 1, 1, 1, 1 ], padding='SAME')
 57 |   if with_bias:
 58 |     return conv + bias_variable([ out_features ])
 59 |   return conv
 60 | 
 61 | def batch_activ_conv(current, in_features, out_features, kernel_size, is_training, keep_prob):
 62 |   current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)
 63 |   current = tf.nn.relu(current)
 64 |   current = conv2d(current, in_features, out_features, kernel_size)
 65 |   current = tf.nn.dropout(current, keep_prob)
 66 |   return current
 67 | 
 68 | def block(input, layers, in_features, growth, is_training, keep_prob):
 69 |   current = input
 70 |   features = in_features
 71 |   for idx in range(layers):
 72 |     tmp = batch_activ_conv(current, features, growth, 3, is_training, keep_prob)
 73 |     current = tf.concat(3, (current, tmp))
 74 |     features += growth
 75 |   return current, features
 76 | 
 77 | def avg_pool(input, s):
 78 |   return tf.nn.avg_pool(input, [ 1, s, s, 1 ], [1, s, s, 1 ], 'VALID')
 79 | 
 80 | def run_model(data, image_dim, label_count, depth):
 81 |   weight_decay = 1e-4
 82 |   layers = (depth - 4) / 3
 83 |   graph = tf.Graph()
 84 |   with graph.as_default():
 85 |     xs = tf.placeholder("float", shape=[None, image_dim])
 86 |     ys = tf.placeholder("float", shape=[None, label_count])
 87 |     lr = tf.placeholder("float", shape=[])
 88 |     keep_prob = tf.placeholder(tf.float32)
 89 |     is_training = tf.placeholder("bool", shape=[])
 90 | 
 91 | 
 92 |     current = tf.reshape(xs, [ -1, 32, 32, 3 ])
 93 |     current = conv2d(current, 3, 16, 3)
 94 | 
 95 |     current, features = block(current, layers, 16, 12, is_training, keep_prob)
 96 |     current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)
 97 |     current = avg_pool(current, 2)
 98 |     current, features = block(current, layers, features, 12, is_training, keep_prob)
 99 |     current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)
100 |     current = avg_pool(current, 2)
101 |     current, features = block(current, layers, features, 12, is_training, keep_prob)
102 | 
103 |     current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)
104 |     current = tf.nn.relu(current)
105 |     current = avg_pool(current, 8)
106 |     final_dim = features
107 |     current = tf.reshape(current, [ -1, final_dim ])
108 |     Wfc = weight_variable([ final_dim, label_count ])
109 |     bfc = bias_variable([ label_count ])
110 |     ys_ = tf.nn.softmax( tf.matmul(current, Wfc) + bfc )
111 | 
112 |     cross_entropy = -tf.reduce_mean(ys * tf.log(ys_ + 1e-12))
113 |     l2 = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
114 |     train_step = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True).minimize(cross_entropy + l2 * weight_decay)
115 |     correct_prediction = tf.equal(tf.argmax(ys_, 1), tf.argmax(ys, 1))
116 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
117 | 
118 |   with tf.Session(graph=graph) as session:
119 |     batch_size = 64
120 |     learning_rate = 0.1
121 |     session.run(tf.global_variables_initializer())
122 |     saver = tf.train.Saver()
123 |     train_data, train_labels = data['train_data'], data['train_labels']
124 |     batch_count = len(train_data) / batch_size
125 |     batches_data = np.split(train_data[:batch_count * batch_size], batch_count)
126 |     batches_labels = np.split(train_labels[:batch_count * batch_size], batch_count)
127 |     print ("Batch per epoch: ", batch_count)
128 |     for epoch in range(1, 1+300):
129 |       if epoch == 150: learning_rate = 0.01
130 |       if epoch == 225: learning_rate = 0.001
131 |       for batch_idx in range(batch_count):
132 |         xs_, ys_ = batches_data[batch_idx], batches_labels[batch_idx]
133 |         batch_res = session.run([ train_step, cross_entropy, accuracy ],
134 |           feed_dict = { xs: xs_, ys: ys_, lr: learning_rate, is_training: True, keep_prob: 0.8 })
135 |         if batch_idx % 100 == 0: print (epoch, batch_idx, batch_res[1:])
136 | 
137 |       save_path = saver.save(session, 'densenet_%d.ckpt' % epoch)
138 |       test_results = run_in_batch_avg(session, [ cross_entropy, accuracy ], [ xs, ys ],
139 |           feed_dict = { xs: data['test_data'], ys: data['test_labels'], is_training: False, keep_prob: 1. })
140 |       print (epoch, batch_res[1:], test_results)
141 | 
142 | def run():
143 |   data_dir = './data'
144 |   image_size = 32
145 |   image_dim = image_size * image_size * 3
146 |   meta = unpickle(data_dir + '/batches.meta')
147 |   label_names = meta['label_names']
148 |   label_count = len(label_names)
149 | 
150 |   train_files = [ 'data_batch_%d' % d for d in range(1, 6) ]
151 |   train_data, train_labels = load_data(train_files, data_dir, label_count)
152 |   pi = np.random.permutation(len(train_data))
153 |   train_data, train_labels = train_data[pi], train_labels[pi]
154 |   test_data, test_labels = load_data([ 'test_batch' ], data_dir, label_count)
155 |   print ("Train:", np.shape(train_data), np.shape(train_labels))
156 |   print ("Test:", np.shape(test_data), np.shape(test_labels))
157 |   data = { 'train_data': train_data,
158 |       'train_labels': train_labels,
159 |       'test_data': test_data,
160 |       'test_labels': test_labels }
161 |   run_model(data, image_dim, label_count, 40)
162 | 
163 | run()
164 | 


--------------------------------------------------------------------------------
/densenetfinalDNS.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Spyder Editor
  4 | 
  5 | This is a temporary script file.
  6 | """
  7 | 
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | import config
 11 | import random
 12 | def unpickle(file):
 13 |   import _pickle as cPickle
 14 |   fo = open(file, 'rb')
 15 |   dict = cPickle.load(fo,encoding='latin1')
 16 |   fo.close()
 17 |   if 'data' in dict:
 18 |     dict['data'] = dict['data'].reshape((-1, 3, 32, 32)).swapaxes(1, 3).swapaxes(1, 2).reshape(-1, 32*32*3) / 256.
 19 | 
 20 |   return dict
 21 | def load_data_one(f):
 22 |   batch = unpickle(f)
 23 |   data = batch['data']
 24 |   labels = batch['labels']
 25 |   print ("Loading %s: %d" % (f, len(data)))
 26 |   return data, labels
 27 | 
 28 | def load_data(files, data_dir, label_count):
 29 |   data, labels = load_data_one(data_dir + '/' + files[0])
 30 |   for f in files[1:]:
 31 |     data_n, labels_n = load_data_one(data_dir + '/' + f)
 32 |     data = np.append(data, data_n, axis=0)
 33 |     labels = np.append(labels, labels_n, axis=0)
 34 |   labels = np.array([ [ float(i == label) for i in range(label_count) ] for label in labels ])
 35 |   return data, labels
 36 | def run_in_batch_avg(session, tensors, batch_placeholders, feed_dict={}, batch_size=200):                              
 37 |   res = [ 0 ] * len(tensors)                                                                                           
 38 |   batch_tensors = [ (placeholder, feed_dict[ placeholder ]) for placeholder in batch_placeholders ]                    
 39 |   total_size = len(batch_tensors[0][1])                                                                                
 40 |   batch_count = int((total_size + batch_size - 1) / batch_size)                                                             
 41 |   for batch_idx in range(batch_count):                                                                                
 42 |     current_batch_size = None                                                                                          
 43 |     for (placeholder, tensor) in batch_tensors:                                                                        
 44 |       batch_tensor = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]                                         
 45 |       current_batch_size = len(batch_tensor)                                                                           
 46 |       feed_dict[placeholder] = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]                               
 47 |     tmp = session.run(tensors, feed_dict=feed_dict)                                                                    
 48 |     res = [ r + t * current_batch_size for (r, t) in zip(res, tmp) ]                                                   
 49 |   return [ r / float(total_size) for r in res ]
 50 | 
 51 | #add dynamic mask
 52 | def add_dyprune(weights):
 53 |     crate = config.crate[weights.name[:-2]] #hyperpara C rate
 54 |     prune_mask = tf.Variable(tf.ones_like(weights),name=weights.name[:-2]+'mask', trainable=False)
 55 | 
 56 |     #calculate mask
 57 |     mean = tf.divide(tf.reduce_sum(tf.multiply(tf.abs(weights),prune_mask)),tf.reduce_sum(prune_mask))
 58 |     var = tf.multiply(weights,prune_mask)
 59 |     var = tf.square(var)
 60 |     mean_q = tf.square(mean)*tf.reduce_sum(prune_mask)
 61 |     var = tf.reduce_sum(var) - mean_q
 62 |     var = tf.divide(var,tf.reduce_sum(prune_mask))
 63 |     var = tf.sqrt(var)
 64 |     t1_lower = (mean+var*crate)*0.25 #hyperpara a
 65 |     t1_upper = (mean+var*crate)*0.45 #hyperpara b
 66 |     
 67 |     indicator_lower1 = tf.greater_equal(tf.abs(weights), tf.ones_like(weights) * t1_lower)    
 68 |     indicator_upper1 = tf.greater_equal(tf.abs(weights), tf.ones_like(weights) * t1_upper)
 69 |     indicator_matrix1 = tf.greater_equal(prune_mask, tf.zeros_like(weights))
 70 |     indicator_matrix1 = tf.logical_and(indicator_matrix1,indicator_lower1)
 71 |     indicator_matrix1 = tf.logical_or(indicator_matrix1,indicator_upper1)
 72 |     indicator_matrix1 = tf.to_float(indicator_matrix1)
 73 |     update = prune_mask.assign(indicator_matrix1)
 74 | 
 75 |     prune_fc = tf.multiply(weights, prune_mask)
 76 |     return prune_fc
 77 | 
 78 | 
 79 | def weight_variable(shape):
 80 |   initial = tf.truncated_normal(shape, stddev=0.01)   
 81 |   w = tf.Variable(initial)
 82 |   if w.name[:-2] in config.all_para: w = add_dyprune(w) #add dynamic mask
 83 |   return w
 84 | 
 85 | def bias_variable(shape):
 86 |   initial = tf.constant(0.01, shape=shape)
 87 |   return tf.Variable(initial)
 88 | 
 89 | def conv2d(input, in_features, out_features, kernel_size, with_bias=False):
 90 |   W = weight_variable([ kernel_size, kernel_size, in_features, out_features ])
 91 |   conv = tf.nn.conv2d(input, W, [ 1, 1, 1, 1 ], padding='SAME')
 92 |   if with_bias:
 93 |     return conv + bias_variable([ out_features ])
 94 |   return conv
 95 | 
 96 | def batch_activ_conv(current, in_features, out_features, kernel_size, is_training, keep_prob):
 97 |   current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)
 98 |   current = tf.nn.relu(current)
 99 |   current = conv2d(current, in_features, out_features, kernel_size)
100 |   current = tf.nn.dropout(current, keep_prob)
101 |   return current
102 | 
103 | def block(input, layers, in_features, growth, is_training, keep_prob):
104 |   current = input
105 |   features = in_features
106 |   for idx in range(layers):
107 |     tmp = batch_activ_conv(current, features, growth, 3, is_training, keep_prob)
108 |     current = tf.concat((current, tmp),3)
109 |     features += growth
110 |   return current, features
111 | 
112 | def avg_pool(input, s):
113 |   return tf.nn.avg_pool(input, [ 1, s, s, 1 ], [1, s, s, 1 ], 'VALID')
114 | 
115 | def get_dict(load_path):
116 |     import pickle
117 |     f2 = open(load_path,"rb")
118 |     load_list = pickle.load(f2)
119 |     f2.close()
120 |     return load_list
121 | 
122 | 
123 | def run_model(data, image_dim, label_count, depth):
124 |   weight_decay = 1e-4
125 |   layers = int((depth - 4) / 3)
126 | 
127 |   xs = tf.placeholder("float", shape=[None, image_dim])
128 |   ys = tf.placeholder("float", shape=[None, label_count])
129 |   lr = tf.placeholder("float", shape=[])
130 |   keep_prob = tf.placeholder(tf.float32)
131 |   is_training = tf.placeholder("bool", shape=[])
132 |   
133 |   current = tf.reshape(xs, [ -1, 32, 32, 3 ])
134 |   current = conv2d(current, 3, 16, 3)
135 |   
136 |   current, features = block(current, layers, 16, 12, is_training, keep_prob)
137 |   current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)
138 |   current = avg_pool(current, 2)
139 |   current, features = block(current, layers, features, 12, is_training, keep_prob)
140 |   current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)
141 |   current = avg_pool(current, 2)
142 |   current, features = block(current, layers, features, 12, is_training, keep_prob)
143 |   current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)
144 |   current = tf.nn.relu(current)
145 |   current = avg_pool(current, 8)
146 |   
147 |   final_dim = features
148 |   current = tf.reshape(current, [ -1, final_dim ])
149 |   Wfc = weight_variable([ final_dim, label_count ])
150 |   bfc = bias_variable([ label_count ])
151 |   ys_ = tf.nn.softmax( tf.matmul(current, Wfc) + bfc )
152 |   cross_entropy = -tf.reduce_mean(ys * tf.log(ys_ + 1e-12))
153 |   l2 = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
154 | 
155 |   
156 |   correct_prediction = tf.equal(tf.argmax(ys_, 1), tf.argmax(ys, 1))
157 |   accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
158 |   
159 |   para_dict={} #weights with mask
160 |   for k in tf.global_variables():
161 |       if k not in tf.contrib.framework.get_variables_by_suffix('Momentum'):
162 |           para_dict[k.name[:-2]] = k
163 |           
164 |   para_toload={} #weights without mask
165 |   for k in tf.global_variables():
166 |     if k not in tf.contrib.framework.get_variables_by_suffix('Momentum'):
167 |         if 'mask' not in k.name:            
168 |             para_toload[k.name[:-2]] = k   
169 | 
170 |   A_assign=[] #group all undate operation
171 |   for k in tf.get_default_graph().get_operations():
172 |     if k.name.startswith('Assign'):
173 |         A_assign.append(k.values()[0])
174 |   updatemask=tf.group(*A_assign)
175 |   
176 |   
177 |   train_step = tf.train.GradientDescentOptimizer(lr).minimize(cross_entropy)
178 |   #train_step = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True).minimize(cross_entropy + l2 * weight_decay)
179 | 
180 |   session = tf.InteractiveSession()
181 |   batch_size = 64
182 |   learning_rate = 0.1
183 |   session.run(tf.global_variables_initializer())
184 |   train_data, train_labels = data['train_data'], data['train_labels']
185 |   batch_count = int(len(train_data) / batch_size)
186 |   batches_data = np.split(train_data[:batch_count * batch_size], batch_count)
187 |   batches_labels = np.split(train_labels[:batch_count * batch_size], batch_count)
188 |   print ("Batch per epoch: ", batch_count)
189 |   
190 |   
191 |   #set weights path
192 |   load_path = 'modellog/weightonlypara93.ckpt'
193 |   #normal save path  
194 |   normal_savepath = 'dns2/dns222_%d.ckpt'
195 |   #best result save path  
196 |   best_savepath = 'dns3/densenetdns222_%d_%d.ckpt'
197 |   
198 |   maxepoch = 35 #maxepoch for DNS
199 |   ##need config
200 |   
201 |   #load weights
202 |   saver = tf.train.Saver(para_dict)
203 |  # saver = tf.train.Saver(para_toload)
204 |   saver.restore(session,load_path)#########################################
205 |   
206 |   #auto-change learning rate to converge quicker
207 |   final_acc = 0.0
208 |   #test_results = run_in_batch_avg(session, [ cross_entropy, accuracy ], [ xs, ys ],
209 |  #   feed_dict = { xs: data['test_data'], ys: data['test_labels'], is_training: False, keep_prob: 1. })
210 | #  print (test_results)
211 |  # final_acc= test_results[1]
212 |   
213 |   sss=0
214 |   count = 0
215 |   saver = tf.train.Saver(para_dict)
216 |   best_acc = 0.0
217 |   for epoch in range(1, 1+300-sss):
218 |     if epoch >= 150-sss: learning_rate = 0.01
219 |     if epoch >= 225-sss: learning_rate = 0.001
220 |     if epoch >= 275-sss: learning_rate = 0.0001
221 |     if final_acc >= 0.77 :learning_rate = 0.01
222 |     if final_acc >= 0.9 :learning_rate = 0.001
223 | #    if final_acc >= 0.927 :learning_rate = 0.0001
224 |     
225 |     
226 |     for batch_idx in range(batch_count):
227 |         xs_, ys_ = batches_data[batch_idx], batches_labels[batch_idx]
228 |         batch_res = session.run([ train_step, cross_entropy, accuracy ],
229 |           feed_dict = { xs: xs_, ys: ys_, lr: learning_rate, is_training: True, keep_prob: 0.8 })
230 |         
231 |         #use prob function to control DNS
232 |         #if 1/(1+0.01*epoch)>random.uniform(0,1) and (epoch<maxepoch) and (epoch % 2 !=0) and (batch_idx % 2 ==0):
233 |         if 1/(1+0.01*epoch)>random.uniform(0,1) and (epoch<maxepoch) and (epoch % 2 !=0) and (batch_idx % 2 ==0):
234 |             session.run(updatemask)
235 |         if batch_idx % 100 == 0: 
236 |             print (epoch, batch_idx, batch_res[1:])
237 |            # print(np.sum(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, "Variable_25mask:0")[0].eval()))
238 |           #  print(np.sum(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, "Variable_32mask:0")[0].eval()))
239 |           
240 |           
241 |     saver.save(session, normal_savepath % epoch)
242 |     test_results = run_in_batch_avg(session, [ cross_entropy, accuracy ], [ xs, ys ],
243 |           feed_dict = { xs: data['test_data'], ys: data['test_labels'], is_training: False, keep_prob: 1. })
244 |     print (epoch, batch_res[1:], test_results)
245 |     
246 |     #save the best result    
247 |     final_acc= test_results[1]
248 |     if best_acc < final_acc:
249 |         count+=1
250 |         best_acc = final_acc
251 |         acc_num = int(final_acc*100000)
252 |         saver.save(session, best_savepath % (acc_num,count))#####################################
253 | 
254 | 
255 | 
256 | 
257 | data_dir = 'data'
258 | image_size = 32
259 | image_dim = image_size * image_size * 3
260 | meta = unpickle(data_dir + '/batches.meta')
261 | label_names = meta['label_names']
262 | label_count = len(label_names)
263 | 
264 | train_files = [ 'data_batch_%d' % d for d in range(1, 6) ]
265 | train_data, train_labels = load_data(train_files, data_dir, label_count)
266 | pi = np.random.permutation(len(train_data))
267 | train_data, train_labels = train_data[pi], train_labels[pi]
268 | test_data, test_labels = load_data([ 'test_batch' ], data_dir, label_count)
269 | print ("Train:", np.shape(train_data), np.shape(train_labels))
270 | print ("Test:", np.shape(test_data), np.shape(test_labels))
271 | data = { 'train_data': train_data,
272 |   'train_labels': train_labels,
273 |   'test_data': test_data,
274 |   'test_labels': test_labels }
275 | 
276 | run_model(data, image_dim, label_count, 40)
277 | 


--------------------------------------------------------------------------------
/densenetfinalinq.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Spyder Editor
  4 | 
  5 | This is a temporary script file.
  6 | """
  7 | 
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | 
 11 | def unpickle(file):
 12 |   import _pickle as cPickle
 13 |   fo = open(file, 'rb')
 14 |   dict = cPickle.load(fo,encoding='latin1')
 15 |   fo.close()
 16 |   if 'data' in dict:
 17 |     dict['data'] = dict['data'].reshape((-1, 3, 32, 32)).swapaxes(1, 3).swapaxes(1, 2).reshape(-1, 32*32*3) / 256.
 18 | 
 19 |   return dict
 20 | def load_data_one(f):
 21 |   batch = unpickle(f)
 22 |   data = batch['data']
 23 |   labels = batch['labels']
 24 |   print ("Loading %s: %d" % (f, len(data)))
 25 |   return data, labels
 26 | 
 27 | def load_data(files, data_dir, label_count):
 28 |   data, labels = load_data_one(data_dir + '/' + files[0])
 29 |   for f in files[1:]:
 30 |     data_n, labels_n = load_data_one(data_dir + '/' + f)
 31 |     data = np.append(data, data_n, axis=0)
 32 |     labels = np.append(labels, labels_n, axis=0)
 33 |   labels = np.array([ [ float(i == label) for i in range(label_count) ] for label in labels ])
 34 |   return data, labels
 35 | def run_in_batch_avg(session, tensors, batch_placeholders, feed_dict={}, batch_size=200):                              
 36 |   res = [ 0 ] * len(tensors)                                                                                           
 37 |   batch_tensors = [ (placeholder, feed_dict[ placeholder ]) for placeholder in batch_placeholders ]                    
 38 |   total_size = len(batch_tensors[0][1])                                                                                
 39 |   batch_count = int((total_size + batch_size - 1) / batch_size)                                                             
 40 |   for batch_idx in range(batch_count):                                                                                
 41 |     current_batch_size = None                                                                                          
 42 |     for (placeholder, tensor) in batch_tensors:                                                                        
 43 |       batch_tensor = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]                                         
 44 |       current_batch_size = len(batch_tensor)                                                                           
 45 |       feed_dict[placeholder] = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]                               
 46 |     tmp = session.run(tensors, feed_dict=feed_dict)                                                                    
 47 |     res = [ r + t * current_batch_size for (r, t) in zip(res, tmp) ]                                                   
 48 |   return [ r / float(total_size) for r in res ]
 49 | def weight_variable(shape):
 50 |   initial = tf.truncated_normal(shape, stddev=0.01)
 51 |   return tf.Variable(initial)
 52 | 
 53 | def bias_variable(shape):
 54 |   initial = tf.constant(0.01, shape=shape)
 55 |   return tf.Variable(initial)
 56 | 
 57 | def conv2d(input, in_features, out_features, kernel_size, with_bias=False):
 58 |   W = weight_variable([ kernel_size, kernel_size, in_features, out_features ])
 59 |   conv = tf.nn.conv2d(input, W, [ 1, 1, 1, 1 ], padding='SAME')
 60 |   if with_bias:
 61 |     return conv + bias_variable([ out_features ])
 62 |   return conv
 63 | 
 64 | def batch_activ_conv(current, in_features, out_features, kernel_size, is_training, keep_prob):
 65 |   current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)
 66 |   current = tf.nn.relu(current)
 67 |   current = conv2d(current, in_features, out_features, kernel_size)
 68 |   current = tf.nn.dropout(current, keep_prob)
 69 |   return current
 70 | 
 71 | def block(input, layers, in_features, growth, is_training, keep_prob):
 72 |   current = input
 73 |   features = in_features
 74 |   for idx in range(layers):
 75 |     tmp = batch_activ_conv(current, features, growth, 3, is_training, keep_prob)
 76 |     current = tf.concat((current, tmp),3)
 77 |     features += growth
 78 |   return current, features
 79 | 
 80 | def avg_pool(input, s):
 81 |   return tf.nn.avg_pool(input, [ 1, s, s, 1 ], [1, s, s, 1 ], 'VALID')
 82 | 
 83 | def get_dict(load_path):
 84 |     import pickle
 85 |     f2 = open(load_path,"rb")
 86 |     load_list = pickle.load(f2)
 87 |     f2.close()
 88 |     return load_list
 89 | 
 90 | #apply INQ on gradients
 91 | def apply_prune_on_grads(grads_and_vars, dict_nzidx):
 92 |     # Mask gradients with pruned elements
 93 |     for key, nzidx in dict_nzidx.items():
 94 |         count = 0
 95 |         for grad, var in grads_and_vars:
 96 |             if var.name == key+":0":
 97 |                 nzidx_obj = tf.cast(tf.constant(nzidx), tf.float32)
 98 |                 grads_and_vars[count] = (tf.multiply(nzidx_obj, grad), var)
 99 |             count += 1
100 |     return grads_and_vars
101 | 
102 | def run_model(data, image_dim, label_count, depth):
103 |   weight_decay = 1e-3
104 |   layers = int((depth - 4) / 3)
105 | 
106 |   xs = tf.placeholder("float", shape=[None, image_dim])
107 |   ys = tf.placeholder("float", shape=[None, label_count])
108 |   lr = tf.placeholder("float", shape=[])
109 |   keep_prob = tf.placeholder(tf.float32)
110 |   is_training = tf.placeholder("bool", shape=[])
111 |   
112 |   current = tf.reshape(xs, [ -1, 32, 32, 3 ])
113 |   current = conv2d(current, 3, 16, 3)
114 |   
115 |   current, features = block(current, layers, 16, 12, is_training, keep_prob)
116 |   current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)
117 |   current = avg_pool(current, 2)
118 |   current, features = block(current, layers, features, 12, is_training, keep_prob)
119 |   current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)
120 |   current = avg_pool(current, 2)
121 |   current, features = block(current, layers, features, 12, is_training, keep_prob)
122 |   current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)
123 |   current = tf.nn.relu(current)
124 |   current = avg_pool(current, 8)
125 |   
126 |   final_dim = features
127 |   current = tf.reshape(current, [ -1, final_dim ])
128 |   Wfc = weight_variable([ final_dim, label_count ])
129 |   bfc = bias_variable([ label_count ])
130 |   ys_ = tf.nn.softmax( tf.matmul(current, Wfc) + bfc )
131 |   cross_entropy = -tf.reduce_mean(ys * tf.log(ys_ + 1e-12))
132 |   l2 = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
133 |   
134 |   
135 |   #Load pruned weights& mask
136 |   para_dict={}
137 |   for k in tf.global_variables():
138 |       if k not in tf.contrib.framework.get_variables_by_suffix('Momentum'):
139 |           para_dict[k.name[:-2]] = k
140 | 
141 |   name = 'inqcom1697100s'
142 |   
143 |   #Load mask
144 |   mask_loadpath=name+'.txt'
145 |   prune_dict = get_dict(mask_loadpath)
146 | 
147 |   #set weights path
148 |   #load_path = 'inqcom3260_92849_2.ckpt'
149 |   load_path = 'inqfinal/'+name+'.ckpt'
150 |   #normal save path  
151 |   normal_savepath = 'inqfinal2/'+name+'_%d.ckpt'
152 |   #best result save path
153 |   best_savepath = 'inqfinal3/'+name+'_%d_%d.ckpt'
154 |   
155 |   
156 |   #trainer = tf.train.GradientDescentOptimizer(lr)
157 |   trainer = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True)
158 | 
159 |   grads_and_vars = trainer.compute_gradients(cross_entropy)
160 |   grads_and_vars = apply_prune_on_grads(grads_and_vars, prune_dict)
161 |   train_step = trainer.apply_gradients(grads_and_vars)
162 |   #train_step = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True).minimize(cross_entropy + l2 * weight_decay)
163 |   
164 |   
165 |   correct_prediction = tf.equal(tf.argmax(ys_, 1), tf.argmax(ys, 1))
166 |   accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
167 |   
168 |   session = tf.InteractiveSession()
169 |   batch_size = 64
170 |   learning_rate = 0.1
171 |   session.run(tf.global_variables_initializer())
172 |   saver = tf.train.Saver(para_dict)
173 |   train_data, train_labels = data['train_data'], data['train_labels']
174 |   batch_count = int(len(train_data) / batch_size)
175 |   batches_data = np.split(train_data[:batch_count * batch_size], batch_count)
176 |   batches_labels = np.split(train_labels[:batch_count * batch_size], batch_count)
177 |   print ("Batch per epoch: ", batch_count)
178 |   
179 |   #Load weights
180 |   saver.restore(session,load_path)
181 |   
182 |   #auto-change learning rate to converge quicker
183 |   sss=0
184 |   count = 0
185 |   final_acc = 0.0
186 |   best_acc = 0.0
187 |   #See loaded results&adjust learning rate
188 |   test_results = run_in_batch_avg(session, [ cross_entropy, accuracy ], [ xs, ys ],
189 |     feed_dict = { xs: data['test_data'], ys: data['test_labels'], is_training: False, keep_prob: 1. })
190 |   print (test_results)
191 |   final_acc= test_results[1]
192 |   
193 |   for epoch in range(1, 1+300-sss):
194 |     if epoch >= 150-sss: learning_rate = 0.01
195 |     if epoch >= 225-sss: learning_rate = 0.001
196 |     if epoch >= 275-sss: learning_rate = 0.0001
197 |     if final_acc >= 0.77 :learning_rate = 0.01
198 |     if final_acc >= 0.9 :learning_rate = 0.001
199 | #    if final_acc >= 0.927 :learning_rate = 0.0001
200 | 
201 |     
202 |     for batch_idx in range(batch_count):
203 |         xs_, ys_ = batches_data[batch_idx], batches_labels[batch_idx]
204 |         batch_res = session.run([ train_step, cross_entropy, accuracy ],
205 |           feed_dict = { xs: xs_, ys: ys_, lr: learning_rate, is_training: True, keep_prob: 0.8 })
206 |         if batch_idx % 100 == 0: print (epoch, batch_idx, batch_res[1:])
207 | 
208 |     saver.save(session, normal_savepath % epoch)
209 |     test_results = run_in_batch_avg(session, [ cross_entropy, accuracy ], [ xs, ys ],
210 |           feed_dict = { xs: data['test_data'], ys: data['test_labels'], is_training: False, keep_prob: 1. })
211 |     print (epoch, batch_res[1:], test_results)
212 |         
213 |     #save the best result
214 |     final_acc= test_results[1]
215 |     if best_acc < final_acc:
216 |         count+=1
217 |         best_acc = final_acc
218 |         acc_num = int(final_acc*100000)
219 |         saver.save(session, best_savepath % (acc_num,count))
220 | 
221 | 
222 | 
223 | 
224 | data_dir = 'data'
225 | image_size = 32
226 | image_dim = image_size * image_size * 3
227 | meta = unpickle(data_dir + '/batches.meta')
228 | label_names = meta['label_names']
229 | label_count = len(label_names)
230 | 
231 | train_files = [ 'data_batch_%d' % d for d in range(1, 6) ]
232 | train_data, train_labels = load_data(train_files, data_dir, label_count)
233 | pi = np.random.permutation(len(train_data))
234 | train_data, train_labels = train_data[pi], train_labels[pi]
235 | test_data, test_labels = load_data([ 'test_batch' ], data_dir, label_count)
236 | print ("Train:", np.shape(train_data), np.shape(train_labels))
237 | print ("Test:", np.shape(test_data), np.shape(test_labels))
238 | data = { 'train_data': train_data,
239 |   'train_labels': train_labels,
240 |   'test_data': test_data,
241 |   'test_labels': test_labels }
242 | 
243 | run_model(data, image_dim, label_count, 40)
244 | 


--------------------------------------------------------------------------------
/densenetfinalkmeans.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Spyder Editor
  4 | 
  5 | This is a temporary script file.
  6 | """
  7 | 
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | 
 11 | def unpickle(file):
 12 |   import _pickle as cPickle
 13 |   fo = open(file, 'rb')
 14 |   dict = cPickle.load(fo,encoding='latin1')
 15 |   fo.close()
 16 |   if 'data' in dict:
 17 |     dict['data'] = dict['data'].reshape((-1, 3, 32, 32)).swapaxes(1, 3).swapaxes(1, 2).reshape(-1, 32*32*3) / 256.
 18 | 
 19 |   return dict
 20 | def load_data_one(f):
 21 |   batch = unpickle(f)
 22 |   data = batch['data']
 23 |   labels = batch['labels']
 24 |   print ("Loading %s: %d" % (f, len(data)))
 25 |   return data, labels
 26 | 
 27 | def load_data(files, data_dir, label_count):
 28 |   data, labels = load_data_one(data_dir + '/' + files[0])
 29 |   for f in files[1:]:
 30 |     data_n, labels_n = load_data_one(data_dir + '/' + f)
 31 |     data = np.append(data, data_n, axis=0)
 32 |     labels = np.append(labels, labels_n, axis=0)
 33 |   labels = np.array([ [ float(i == label) for i in range(label_count) ] for label in labels ])
 34 |   return data, labels
 35 | def run_in_batch_avg(session, tensors, batch_placeholders, feed_dict={}, batch_size=200):                              
 36 |   res = [ 0 ] * len(tensors)                                                                                           
 37 |   batch_tensors = [ (placeholder, feed_dict[ placeholder ]) for placeholder in batch_placeholders ]                    
 38 |   total_size = len(batch_tensors[0][1])                                                                                
 39 |   batch_count = int((total_size + batch_size - 1) / batch_size)                                                             
 40 |   for batch_idx in range(batch_count):                                                                                
 41 |     current_batch_size = None                                                                                          
 42 |     for (placeholder, tensor) in batch_tensors:                                                                        
 43 |       batch_tensor = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]                                         
 44 |       current_batch_size = len(batch_tensor)                                                                           
 45 |       feed_dict[placeholder] = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]                               
 46 |     tmp = session.run(tensors, feed_dict=feed_dict)                                                                    
 47 |     res = [ r + t * current_batch_size for (r, t) in zip(res, tmp) ]                                                   
 48 |   return [ r / float(total_size) for r in res ]
 49 | def weight_variable(shape):
 50 |   initial = tf.truncated_normal(shape, stddev=0.01)
 51 |   return tf.Variable(initial)
 52 | 
 53 | def bias_variable(shape):
 54 |   initial = tf.constant(0.01, shape=shape)
 55 |   return tf.Variable(initial)
 56 | 
 57 | def conv2d(input, in_features, out_features, kernel_size, with_bias=False):
 58 |   W = weight_variable([ kernel_size, kernel_size, in_features, out_features ])
 59 |   conv = tf.nn.conv2d(input, W, [ 1, 1, 1, 1 ], padding='SAME')
 60 |   if with_bias:
 61 |     return conv + bias_variable([ out_features ])
 62 |   return conv
 63 | 
 64 | def batch_activ_conv(current, in_features, out_features, kernel_size, is_training, keep_prob):
 65 |   current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)
 66 |   current = tf.nn.relu(current)
 67 |   current = conv2d(current, in_features, out_features, kernel_size)
 68 |   current = tf.nn.dropout(current, keep_prob)
 69 |   return current
 70 | 
 71 | def block(input, layers, in_features, growth, is_training, keep_prob):
 72 |   current = input
 73 |   features = in_features
 74 |   for idx in range(layers):
 75 |     tmp = batch_activ_conv(current, features, growth, 3, is_training, keep_prob)
 76 |     current = tf.concat((current, tmp),3)
 77 |     features += growth
 78 |   return current, features
 79 | 
 80 | def avg_pool(input, s):
 81 |   return tf.nn.avg_pool(input, [ 1, s, s, 1 ], [1, s, s, 1 ], 'VALID')
 82 | 
 83 | def get_dict(load_path):
 84 |     import pickle
 85 |     f2 = open(load_path,"rb")
 86 |     load_list = pickle.load(f2)
 87 |     f2.close()
 88 |     return load_list
 89 | 
 90 | #apply K-Q on gradients
 91 | def apply_weightshared_on_grads(grads_and_vars, dict_cenidx):
 92 |     # Mask gradients with pruned elements
 93 |     import config
 94 |     for key, cenidx in dict_cenidx.items():
 95 |         count = 0
 96 |         for grad, var in grads_and_vars:
 97 |                 if var.name == key+":0":
 98 |                     num_cen = config.kmeans_para[key] 
 99 |                     tem_grad=[]    
100 |                     for i in range(num_cen):
101 |                         cenidx_obj = tf.cast(tf.constant(cenidx == i), tf.float32)
102 |                         tem_grad.append(tf.scalar_mul(tf.reduce_sum(tf.multiply(cenidx_obj,grad)),cenidx_obj))
103 |                     grad=(tf.add_n(tem_grad))
104 |                     grads_and_vars[count] = (grad, var)
105 |                 count += 1
106 |     return grads_and_vars
107 | 
108 | def run_model(data, image_dim, label_count, depth):
109 |   weight_decay = 1e-4
110 |   layers = int((depth - 4) / 3)
111 | 
112 |   xs = tf.placeholder("float", shape=[None, image_dim])
113 |   ys = tf.placeholder("float", shape=[None, label_count])
114 |   lr = tf.placeholder("float", shape=[])
115 |   keep_prob = tf.placeholder(tf.float32)
116 |   is_training = tf.placeholder("bool", shape=[])
117 |   
118 |   current = tf.reshape(xs, [ -1, 32, 32, 3 ])
119 |   current = conv2d(current, 3, 16, 3)
120 |   
121 |   current, features = block(current, layers, 16, 12, is_training, keep_prob)
122 |   current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)
123 |   current = avg_pool(current, 2)
124 |   current, features = block(current, layers, features, 12, is_training, keep_prob)
125 |   current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)
126 |   current = avg_pool(current, 2)
127 |   current, features = block(current, layers, features, 12, is_training, keep_prob)
128 |   current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)
129 |   current = tf.nn.relu(current)
130 |   current = avg_pool(current, 8)
131 |   
132 |   final_dim = features
133 |   current = tf.reshape(current, [ -1, final_dim ])
134 |   Wfc = weight_variable([ final_dim, label_count ])
135 |   bfc = bias_variable([ label_count ])
136 |   ys_ = tf.nn.softmax( tf.matmul(current, Wfc) + bfc )
137 |   cross_entropy = -tf.reduce_mean(ys * tf.log(ys_ + 1e-12))
138 |   l2 = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
139 |   
140 |   #Load pruned weights& mask
141 |   para_dict={}
142 |   for k in tf.global_variables():
143 |       if k not in tf.contrib.framework.get_variables_by_suffix('Momentum'):
144 |           para_dict[k.name[:-2]] = k
145 | 
146 |   #Load mask   
147 |   mask_loadpath='100pkmeans64.txt'
148 |   prune_dict = get_dict(mask_loadpath)
149 | 
150 |   #set weights path
151 |   load_path = 'kmeans/100pkmeans64.ckpt'
152 |   #normal save path
153 |   normal_savepath = 'kmeans2/100pkmeans64_%d.ckpt'
154 |   #best result save path  
155 |   best_savepath = 'kmeans3/100pkmeans64_%d_%dok.ckpt'
156 |   ########################config needed
157 |   
158 |   #trainer = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True)
159 |   trainer = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True)
160 |   #trainer = tf.train.GradientDescentOptimizer(lr)
161 |   
162 |   grads_and_vars = trainer.compute_gradients(cross_entropy)
163 |   grads_and_vars = apply_weightshared_on_grads(grads_and_vars, prune_dict)
164 |   train_step = trainer.apply_gradients(grads_and_vars)
165 |   #train_step = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True).minimize(cross_entropy + l2 * weight_decay)
166 |   
167 |   correct_prediction = tf.equal(tf.argmax(ys_, 1), tf.argmax(ys, 1))
168 |   accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
169 |   
170 | 
171 |   session = tf.InteractiveSession()
172 |   batch_size = 64
173 |   learning_rate = 0.1
174 |   session.run(tf.global_variables_initializer())
175 |   saver = tf.train.Saver(para_dict)
176 |   train_data, train_labels = data['train_data'], data['train_labels']
177 |   batch_count = int(len(train_data) / batch_size)
178 |   batches_data = np.split(train_data[:batch_count * batch_size], batch_count)
179 |   batches_labels = np.split(train_labels[:batch_count * batch_size], batch_count)
180 |   print ("Batch per epoch: ", batch_count)
181 |   
182 |   #Load weights
183 |   saver.restore(session,load_path)
184 |   
185 |   #auto-change learning rate to converge quicker  
186 |   sss=0
187 |   count = 0
188 |   final_acc = 0.0
189 |   best_acc = 0.0
190 |   
191 |   test_results = run_in_batch_avg(session, [ cross_entropy, accuracy ], [ xs, ys ],
192 |     feed_dict = { xs: data['test_data'], ys: data['test_labels'], is_training: False, keep_prob: 1. })
193 |   print (test_results)
194 |   final_acc= test_results[1]
195 |   
196 |   for epoch in range(1, 1+300-sss):
197 |     if epoch >= 150-sss: learning_rate = 0.01
198 |     if epoch >= 225-sss: learning_rate = 0.001
199 |     if epoch >= 275-sss: learning_rate = 0.0001
200 |     if final_acc >= 0.77 :learning_rate = 0.01
201 |     if final_acc >= 0.9 :learning_rate = 0.001
202 | #    if final_acc >= 0.927 :learning_rate = 0.0001
203 |     
204 |     for batch_idx in range(batch_count):
205 |         xs_, ys_ = batches_data[batch_idx], batches_labels[batch_idx]
206 |         batch_res = session.run([ train_step, cross_entropy, accuracy ],
207 |           feed_dict = { xs: xs_, ys: ys_, lr: learning_rate, is_training: True, keep_prob: 0.8 })
208 |         if batch_idx % 100 == 0: print (epoch, batch_idx, batch_res[1:])
209 | 
210 |     saver.save(session, normal_savepath % epoch)
211 |     test_results = run_in_batch_avg(session, [ cross_entropy, accuracy ], [ xs, ys ],
212 |           feed_dict = { xs: data['test_data'], ys: data['test_labels'], is_training: False, keep_prob: 1. })
213 |     print (epoch, batch_res[1:], test_results)
214 |     
215 |     #save the best result    
216 |     final_acc= test_results[1]
217 |     if best_acc < final_acc:
218 |         count+=1
219 |         best_acc = final_acc
220 |         acc_num = int(final_acc*100000)
221 |         saver.save(session, best_savepath % (acc_num,count))
222 | 
223 | 
224 | 
225 | 
226 | data_dir = 'data'
227 | image_size = 32
228 | image_dim = image_size * image_size * 3
229 | meta = unpickle(data_dir + '/batches.meta')
230 | label_names = meta['label_names']
231 | label_count = len(label_names)
232 | 
233 | train_files = [ 'data_batch_%d' % d for d in range(1, 6) ]
234 | train_data, train_labels = load_data(train_files, data_dir, label_count)
235 | pi = np.random.permutation(len(train_data))
236 | train_data, train_labels = train_data[pi], train_labels[pi]
237 | test_data, test_labels = load_data([ 'test_batch' ], data_dir, label_count)
238 | print ("Train:", np.shape(train_data), np.shape(train_labels))
239 | print ("Test:", np.shape(test_data), np.shape(test_labels))
240 | data = { 'train_data': train_data,
241 |   'train_labels': train_labels,
242 |   'test_data': test_data,
243 |   'test_labels': test_labels }
244 | 
245 | run_model(data, image_dim, label_count, 40)
246 | 


--------------------------------------------------------------------------------
/densenetfinalprune.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Spyder Editor
  4 | 
  5 | This is a temporary script file.
  6 | """
  7 | 
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | 
 11 | def unpickle(file):
 12 |   import _pickle as cPickle
 13 |   fo = open(file, 'rb')
 14 |   dict = cPickle.load(fo,encoding='latin1')
 15 |   fo.close()
 16 |   if 'data' in dict:
 17 |     dict['data'] = dict['data'].reshape((-1, 3, 32, 32)).swapaxes(1, 3).swapaxes(1, 2).reshape(-1, 32*32*3) / 256.
 18 | 
 19 |   return dict
 20 | def load_data_one(f):
 21 |   batch = unpickle(f)
 22 |   data = batch['data']
 23 |   labels = batch['labels']
 24 |   print ("Loading %s: %d" % (f, len(data)))
 25 |   return data, labels
 26 | 
 27 | def load_data(files, data_dir, label_count):
 28 |   data, labels = load_data_one(data_dir + '/' + files[0])
 29 |   for f in files[1:]:
 30 |     data_n, labels_n = load_data_one(data_dir + '/' + f)
 31 |     data = np.append(data, data_n, axis=0)
 32 |     labels = np.append(labels, labels_n, axis=0)
 33 |   labels = np.array([ [ float(i == label) for i in range(label_count) ] for label in labels ])
 34 |   return data, labels
 35 | def run_in_batch_avg(session, tensors, batch_placeholders, feed_dict={}, batch_size=200):                              
 36 |   res = [ 0 ] * len(tensors)                                                                                           
 37 |   batch_tensors = [ (placeholder, feed_dict[ placeholder ]) for placeholder in batch_placeholders ]                    
 38 |   total_size = len(batch_tensors[0][1])                                                                                
 39 |   batch_count = int((total_size + batch_size - 1) / batch_size)                                                             
 40 |   for batch_idx in range(batch_count):                                                                                
 41 |     current_batch_size = None                                                                                          
 42 |     for (placeholder, tensor) in batch_tensors:                                                                        
 43 |       batch_tensor = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]                                         
 44 |       current_batch_size = len(batch_tensor)                                                                           
 45 |       feed_dict[placeholder] = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]                               
 46 |     tmp = session.run(tensors, feed_dict=feed_dict)                                                                    
 47 |     res = [ r + t * current_batch_size for (r, t) in zip(res, tmp) ]                                                   
 48 |   return [ r / float(total_size) for r in res ]
 49 | def weight_variable(shape):
 50 |   initial = tf.truncated_normal(shape, stddev=0.01)
 51 |   return tf.Variable(initial)
 52 | 
 53 | def bias_variable(shape):
 54 |   initial = tf.constant(0.01, shape=shape)
 55 |   return tf.Variable(initial)
 56 | 
 57 | def conv2d(input, in_features, out_features, kernel_size, with_bias=False):
 58 |   W = weight_variable([ kernel_size, kernel_size, in_features, out_features ])
 59 |   conv = tf.nn.conv2d(input, W, [ 1, 1, 1, 1 ], padding='SAME')
 60 |   if with_bias:
 61 |     return conv + bias_variable([ out_features ])
 62 |   return conv
 63 | 
 64 | def batch_activ_conv(current, in_features, out_features, kernel_size, is_training, keep_prob):
 65 |   current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)
 66 |   current = tf.nn.relu(current)
 67 |   current = conv2d(current, in_features, out_features, kernel_size)
 68 |   current = tf.nn.dropout(current, keep_prob)
 69 |   return current
 70 | 
 71 | def block(input, layers, in_features, growth, is_training, keep_prob):
 72 |   current = input
 73 |   features = in_features
 74 |   for idx in range(layers):
 75 |     tmp = batch_activ_conv(current, features, growth, 3, is_training, keep_prob)
 76 |     current = tf.concat((current, tmp),3)
 77 |     features += growth
 78 |   return current, features
 79 | 
 80 | def avg_pool(input, s):
 81 |   return tf.nn.avg_pool(input, [ 1, s, s, 1 ], [1, s, s, 1 ], 'VALID')
 82 | 
 83 | def get_dict(load_path):
 84 |     import pickle
 85 |     f2 = open(load_path,"rb")
 86 |     load_list = pickle.load(f2)
 87 |     f2.close()
 88 |     return load_list
 89 | 
 90 | #apply pruning on gradients
 91 | def apply_prune_on_grads(grads_and_vars, dict_nzidx):
 92 |     # Mask gradients with pruned elements
 93 |     for key, nzidx in dict_nzidx.items():
 94 |         count = 0
 95 |         for grad, var in grads_and_vars:
 96 |             if var.name == key+":0":
 97 |                 nzidx_obj = tf.cast(tf.constant(nzidx), tf.float32)
 98 |                 grads_and_vars[count] = (tf.multiply(nzidx_obj, grad), var)
 99 |             count += 1
100 |     return grads_and_vars
101 | 
102 | def run_model(data, image_dim, label_count, depth):
103 |   weight_decay = 1e-4
104 |   layers = int((depth - 4) / 3)
105 | 
106 |   xs = tf.placeholder("float", shape=[None, image_dim])
107 |   ys = tf.placeholder("float", shape=[None, label_count])
108 |   lr = tf.placeholder("float", shape=[])
109 |   keep_prob = tf.placeholder(tf.float32)
110 |   is_training = tf.placeholder("bool", shape=[])
111 |   
112 |   current = tf.reshape(xs, [ -1, 32, 32, 3 ])
113 |   current = conv2d(current, 3, 16, 3)
114 |   
115 |   current, features = block(current, layers, 16, 12, is_training, keep_prob)
116 |   current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)
117 |   current = avg_pool(current, 2)
118 |   current, features = block(current, layers, features, 12, is_training, keep_prob)
119 |   current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)
120 |   current = avg_pool(current, 2)
121 |   current, features = block(current, layers, features, 12, is_training, keep_prob)
122 |   current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)
123 |   current = tf.nn.relu(current)
124 |   current = avg_pool(current, 8)
125 |   
126 |   final_dim = features
127 |   current = tf.reshape(current, [ -1, final_dim ])
128 |   Wfc = weight_variable([ final_dim, label_count ])
129 |   bfc = bias_variable([ label_count ])
130 |   ys_ = tf.nn.softmax( tf.matmul(current, Wfc) + bfc )
131 |   cross_entropy = -tf.reduce_mean(ys * tf.log(ys_ + 1e-12))
132 |   l2 = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
133 |   correct_prediction = tf.equal(tf.argmax(ys_, 1), tf.argmax(ys, 1))
134 |   accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
135 | 
136 |   #Load pruned weights& mask
137 |   para_dict={}
138 |   for k in tf.global_variables():
139 |       if k not in tf.contrib.framework.get_variables_by_suffix('Momentum'):
140 |           para_dict[k.name[:-2]] = k
141 |     
142 |   
143 |   name = 'prune50110'
144 |   #Load mask
145 |   mask_loadpath=name+'.txt'
146 |   prune_dict = get_dict(mask_loadpath)############################################################################
147 |   
148 |   #set weights path
149 |   #load_path = 'inqcom3260_92849_2.ckpt'
150 |   load_path = 'prune/'+name+'.ckpt'
151 |   #normal save path
152 |   normal_savepath = 'prune2/'+name+'_%d.ckpt'
153 |   #best result save path
154 |   best_savepath = 'prune3/'+name+'_%d_%d.ckpt'
155 |   
156 | 
157 |   import math
158 |   overallprune = 0.75
159 |   keep_probf = 1 - 0.2 * math.sqrt(overallprune) #adjust dropout ratio
160 | 
161 |   
162 | 
163 |   trainer = tf.train.GradientDescentOptimizer(lr)
164 |  # trainer = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True).minimize(cross_entropy + l2 * weight_decay)
165 | 
166 |   grads_and_vars = trainer.compute_gradients(cross_entropy)
167 |   grads_and_vars = apply_prune_on_grads(grads_and_vars, prune_dict)
168 |   train_step = trainer.apply_gradients(grads_and_vars)
169 |   #train_step = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True).minimize(cross_entropy + l2 * weight_decay)
170 | 
171 |   session = tf.InteractiveSession()
172 |   
173 |   
174 |   batch_size = 64
175 |   learning_rate = 0.01
176 |   #learning_rate = 0.001
177 |   
178 |   session.run(tf.global_variables_initializer())
179 |   saver = tf.train.Saver(para_dict)
180 |   train_data, train_labels = data['train_data'], data['train_labels']
181 |   batch_count = int(len(train_data) / batch_size)
182 |   batches_data = np.split(train_data[:batch_count * batch_size], batch_count)
183 |   batches_labels = np.split(train_labels[:batch_count * batch_size], batch_count)
184 |   print ("Batch per epoch: ", batch_count)
185 |   
186 |   #Load weights
187 |   saver.restore(session,load_path)################################################################
188 |   
189 |   
190 |   final_acc = 0.0
191 |  # test_results = run_in_batch_avg(session, [ cross_entropy, accuracy ], [ xs, ys ],
192 |   #  feed_dict = { xs: data['test_data'], ys: data['test_labels'], is_training: False, keep_prob: 1. })
193 |  # print (test_results)
194 | #  final_acc= test_results[1]
195 |   
196 |   #auto-change learning rate to converge quicker
197 |   sss=0
198 |   count = 0
199 |   best_acc = 0.0
200 |   for epoch in range(1, 1+300-sss):
201 |     if epoch >= 150-sss: learning_rate = 0.01
202 |     if epoch >= 225-sss: learning_rate = 0.001
203 |     if epoch >= 275-sss: learning_rate = 0.0001
204 |     if final_acc >= 0.77 :learning_rate = 0.01
205 |     if final_acc >= 0.9 :learning_rate = 0.001
206 |   #  if final_acc >= 0.927 :learning_rate = 0.0001
207 | 
208 |     
209 |     for batch_idx in range(batch_count):
210 |         xs_, ys_ = batches_data[batch_idx], batches_labels[batch_idx]
211 |         batch_res = session.run([ train_step, cross_entropy, accuracy ],
212 |           feed_dict = { xs: xs_, ys: ys_, lr: learning_rate, is_training: True, keep_prob: keep_probf })
213 |         if batch_idx % 100 == 0: print (epoch, batch_idx, batch_res[1:])
214 | 
215 |     saver.save(session, normal_savepath % epoch)##################################################
216 |     test_results = run_in_batch_avg(session, [ cross_entropy, accuracy ], [ xs, ys ],
217 |           feed_dict = { xs: data['test_data'], ys: data['test_labels'], is_training: False, keep_prob: 1. })
218 |     print (epoch, batch_res[1:], test_results)
219 |     
220 | 
221 |     #save the best result
222 |     final_acc= test_results[1]
223 |     if best_acc < final_acc:
224 |         count+=1
225 |         best_acc = final_acc
226 |         acc_num = int(final_acc*100000)
227 |         saver.save(session, best_savepath % (acc_num,count))##################################
228 |     if final_acc > 0.93: saver.save(session, 'prune75_93.ckpt')
229 | 
230 | 
231 | 
232 | 
233 | data_dir = 'data'
234 | image_size = 32
235 | image_dim = image_size * image_size * 3
236 | meta = unpickle(data_dir + '/batches.meta')
237 | label_names = meta['label_names']
238 | label_count = len(label_names)
239 | 
240 | train_files = [ 'data_batch_%d' % d for d in range(1, 6) ]
241 | train_data, train_labels = load_data(train_files, data_dir, label_count)
242 | pi = np.random.permutation(len(train_data))
243 | train_data, train_labels = train_data[pi], train_labels[pi]
244 | test_data, test_labels = load_data([ 'test_batch' ], data_dir, label_count)
245 | print ("Train:", np.shape(train_data), np.shape(train_labels))
246 | print ("Test:", np.shape(test_data), np.shape(test_labels))
247 | data = { 'train_data': train_data,
248 |   'train_labels': train_labels,
249 |   'test_data': test_data,
250 |   'test_labels': test_labels }
251 | 
252 | run_model(data, image_dim, label_count, 40)
253 | 


--------------------------------------------------------------------------------
/densenetfinaltest.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Spyder Editor
  4 | 
  5 | This is a temporary script file.
  6 | """
  7 | 
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | 
 11 | def unpickle(file):
 12 |   import _pickle as cPickle
 13 |   fo = open(file, 'rb')
 14 |   dict = cPickle.load(fo,encoding='latin1')
 15 |   fo.close()
 16 |   if 'data' in dict:
 17 |     dict['data'] = dict['data'].reshape((-1, 3, 32, 32)).swapaxes(1, 3).swapaxes(1, 2).reshape(-1, 32*32*3) / 256.
 18 | 
 19 |   return dict
 20 | def load_data_one(f):
 21 |   batch = unpickle(f)
 22 |   data = batch['data']
 23 |   labels = batch['labels']
 24 |   print ("Loading %s: %d" % (f, len(data)))
 25 |   return data, labels
 26 | 
 27 | def load_data(files, data_dir, label_count):
 28 |   data, labels = load_data_one(data_dir + '/' + files[0])
 29 |   for f in files[1:]:
 30 |     data_n, labels_n = load_data_one(data_dir + '/' + f)
 31 |     data = np.append(data, data_n, axis=0)
 32 |     labels = np.append(labels, labels_n, axis=0)
 33 |   labels = np.array([ [ float(i == label) for i in range(label_count) ] for label in labels ])
 34 |   return data, labels
 35 | def run_in_batch_avg(session, tensors, batch_placeholders, feed_dict={}, batch_size=200):                              
 36 |   res = [ 0 ] * len(tensors)                                                                                           
 37 |   batch_tensors = [ (placeholder, feed_dict[ placeholder ]) for placeholder in batch_placeholders ]                    
 38 |   total_size = len(batch_tensors[0][1])                                                                                
 39 |   batch_count = int((total_size + batch_size - 1) / batch_size)                                                             
 40 |   for batch_idx in range(batch_count):                                                                                
 41 |     current_batch_size = None                                                                                          
 42 |     for (placeholder, tensor) in batch_tensors:                                                                        
 43 |       batch_tensor = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]                                         
 44 |       current_batch_size = len(batch_tensor)                                                                           
 45 |       feed_dict[placeholder] = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]                               
 46 |     tmp = session.run(tensors, feed_dict=feed_dict)                                                                    
 47 |     res = [ r + t * current_batch_size for (r, t) in zip(res, tmp) ]                                                   
 48 |   return [ r / float(total_size) for r in res ]
 49 | def weight_variable(shape):
 50 |   initial = tf.truncated_normal(shape, stddev=0.01)
 51 |   return tf.Variable(initial)
 52 | 
 53 | def bias_variable(shape):
 54 |   initial = tf.constant(0.01, shape=shape)
 55 |   return tf.Variable(initial)
 56 | 
 57 | def conv2d(input, in_features, out_features, kernel_size, with_bias=False):
 58 |   W = weight_variable([ kernel_size, kernel_size, in_features, out_features ])
 59 |   conv = tf.nn.conv2d(input, W, [ 1, 1, 1, 1 ], padding='SAME')
 60 |   if with_bias:
 61 |     return conv + bias_variable([ out_features ])
 62 |   return conv
 63 | 
 64 | def batch_activ_conv(current, in_features, out_features, kernel_size, is_training, keep_prob):
 65 |   current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)
 66 |   current = tf.nn.relu(current)
 67 |   current = conv2d(current, in_features, out_features, kernel_size)
 68 |   current = tf.nn.dropout(current, keep_prob)
 69 |   return current
 70 | 
 71 | def block(input, layers, in_features, growth, is_training, keep_prob):
 72 |   current = input
 73 |   features = in_features
 74 |   for idx in range(layers):
 75 |     tmp = batch_activ_conv(current, features, growth, 3, is_training, keep_prob)
 76 |     current = tf.concat((current, tmp),3)
 77 |     features += growth
 78 |   return current, features
 79 | 
 80 | def avg_pool(input, s):
 81 |   return tf.nn.avg_pool(input, [ 1, s, s, 1 ], [1, s, s, 1 ], 'VALID')
 82 | 
 83 | def run_model(data, image_dim, label_count, depth):
 84 |   weight_decay = 1e-4
 85 |   layers = int((depth - 4) / 3)
 86 | 
 87 |   xs = tf.placeholder("float", shape=[None, image_dim])
 88 |   ys = tf.placeholder("float", shape=[None, label_count])
 89 |   lr = tf.placeholder("float", shape=[])
 90 |   keep_prob = tf.placeholder(tf.float32)
 91 |   is_training = tf.placeholder("bool", shape=[])
 92 |   
 93 |   current = tf.reshape(xs, [ -1, 32, 32, 3 ])
 94 |   current = conv2d(current, 3, 16, 3)
 95 |   
 96 |   current, features = block(current, layers, 16, 12, is_training, keep_prob)
 97 |   current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)
 98 |   current = avg_pool(current, 2)
 99 |   current, features = block(current, layers, features, 12, is_training, keep_prob)
100 |   current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)
101 |   current = avg_pool(current, 2)
102 |   current, features = block(current, layers, features, 12, is_training, keep_prob)
103 |   current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)
104 |   current = tf.nn.relu(current)
105 |   current = avg_pool(current, 8)
106 |   
107 |   final_dim = features
108 |   current = tf.reshape(current, [ -1, final_dim ])
109 |   Wfc = weight_variable([ final_dim, label_count ])
110 |   bfc = bias_variable([ label_count ])
111 |   ys_ = tf.nn.softmax( tf.matmul(current, Wfc) + bfc )
112 |   cross_entropy = -tf.reduce_mean(ys * tf.log(ys_ + 1e-12))
113 |   l2 = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
114 |   train_step = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True).minimize(cross_entropy + l2 * weight_decay)
115 |   correct_prediction = tf.equal(tf.argmax(ys_, 1), tf.argmax(ys, 1))
116 |   accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
117 |   
118 |   parm_dict={}
119 |   for k in tf.global_variables():
120 |       if k not in tf.contrib.framework.get_variables_by_suffix('Momentum'):
121 |           parm_dict[k.name[:-2]] = k
122 |   session = tf.InteractiveSession()
123 |   batch_size = 64
124 |   learning_rate = 0.1
125 |   session.run(tf.global_variables_initializer())
126 |   saver = tf.train.Saver(parm_dict)
127 |   train_data, train_labels = data['train_data'], data['train_labels']
128 |   batch_count = int(len(train_data) / batch_size)
129 |   batches_data = np.split(train_data[:batch_count * batch_size], batch_count)
130 |   batches_labels = np.split(train_labels[:batch_count * batch_size], batch_count)
131 |   print ("Batch per epoch: ", batch_count)
132 |   
133 |   
134 |   
135 |   saver.restore(session,'inq16100s.ckpt')
136 |   test_results = run_in_batch_avg(session, [ cross_entropy, accuracy ], [ xs, ys ],
137 |                                   feed_dict = { xs: data['test_data'], ys: data['test_labels'], is_training: False, keep_prob: 1. })
138 |   print(test_results)
139 | 
140 | 
141 | #      if test_results[1]>0.93: saver.save(session, 'densenetest_%d.ckpt' % test_results)
142 | 
143 | 
144 | data_dir = 'data'
145 | image_size = 32
146 | image_dim = image_size * image_size * 3
147 | meta = unpickle(data_dir + '/batches.meta')
148 | label_names = meta['label_names']
149 | label_count = len(label_names)
150 | 
151 | train_files = [ 'data_batch_%d' % d for d in range(1, 6) ]
152 | train_data, train_labels = load_data(train_files, data_dir, label_count)
153 | pi = np.random.permutation(len(train_data))
154 | train_data, train_labels = train_data[pi], train_labels[pi]
155 | test_data, test_labels = load_data([ 'test_batch' ], data_dir, label_count)
156 | print ("Train:", np.shape(train_data), np.shape(train_labels))
157 | print ("Test:", np.shape(test_data), np.shape(test_labels))
158 | data = { 'train_data': train_data,
159 |   'train_labels': train_labels,
160 |   'test_data': test_data,
161 |   'test_labels': test_labels }
162 | 
163 | run_model(data, image_dim, label_count, 40)
164 | 


--------------------------------------------------------------------------------
/dnet_INQ.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "##################Build Essential DenseNet to Load Pretrained Parameters############################################### \n",
 12 |     "# encoding=utf8  \n",
 13 |     "import numpy as np\n",
 14 |     "import tensorflow as tf\n",
 15 |     "\n",
 16 |     "def unpickle(file):\n",
 17 |     "  import _pickle as cPickle\n",
 18 |     "  fo = open(file, 'rb')\n",
 19 |     "  dict = cPickle.load(fo,encoding='latin1')\n",
 20 |     "  fo.close()\n",
 21 |     "  if 'data' in dict:\n",
 22 |     "    dict['data'] = dict['data'].reshape((-1, 3, 32, 32)).swapaxes(1, 3).swapaxes(1, 2).reshape(-1, 32*32*3) / 256.\n",
 23 |     "\n",
 24 |     "  return dict"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 2,
 30 |    "metadata": {
 31 |     "collapsed": true
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "def load_data_one(f):\n",
 36 |     "  batch = unpickle(f)\n",
 37 |     "  data = batch['data']\n",
 38 |     "  labels = batch['labels']\n",
 39 |     "  print (\"Loading %s: %d\" % (f, len(data)))\n",
 40 |     "  return data, labels\n"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 3,
 46 |    "metadata": {
 47 |     "collapsed": true
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "\n",
 52 |     "def load_data(files, data_dir, label_count):\n",
 53 |     "  data, labels = load_data_one(data_dir + '/' + files[0])\n",
 54 |     "  for f in files[1:]:\n",
 55 |     "    data_n, labels_n = load_data_one(data_dir + '/' + f)\n",
 56 |     "    data = np.append(data, data_n, axis=0)\n",
 57 |     "    labels = np.append(labels, labels_n, axis=0)\n",
 58 |     "  labels = np.array([ [ float(i == label) for i in range(label_count) ] for label in labels ])\n",
 59 |     "  return data, labels"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 4,
 65 |    "metadata": {
 66 |     "collapsed": true
 67 |    },
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "def run_in_batch_avg(session, tensors, batch_placeholders, feed_dict={}, batch_size=200):                              \n",
 71 |     "  res = [ 0 ] * len(tensors)                                                                                           \n",
 72 |     "  batch_tensors = [ (placeholder, feed_dict[ placeholder ]) for placeholder in batch_placeholders ]                    \n",
 73 |     "  total_size = len(batch_tensors[0][1])                                                                                \n",
 74 |     "  batch_count = (total_size + batch_size - 1) / batch_size                                                             \n",
 75 |     "  for batch_idx in range(batch_count):                                                                                \n",
 76 |     "    current_batch_size = None                                                                                          \n",
 77 |     "    for (placeholder, tensor) in batch_tensors:                                                                        \n",
 78 |     "      batch_tensor = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]                                         \n",
 79 |     "      current_batch_size = len(batch_tensor)                                                                           \n",
 80 |     "      feed_dict[placeholder] = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]                               \n",
 81 |     "    tmp = session.run(tensors, feed_dict=feed_dict)                                                                    \n",
 82 |     "    res = [ r + t * current_batch_size for (r, t) in zip(res, tmp) ]                                                   \n",
 83 |     "  return [ r / float(total_size) for r in res ]\n"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 5,
 89 |    "metadata": {
 90 |     "collapsed": true
 91 |    },
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "def weight_variable(shape):\n",
 95 |     "  initial = tf.truncated_normal(shape, stddev=0.01)\n",
 96 |     "  return tf.Variable(initial)\n",
 97 |     "\n",
 98 |     "def bias_variable(shape):\n",
 99 |     "  initial = tf.constant(0.01, shape=shape)\n",
100 |     "  return tf.Variable(initial)\n",
101 |     "\n",
102 |     "def conv2d(input, in_features, out_features, kernel_size, with_bias=False):\n",
103 |     "  W = weight_variable([ kernel_size, kernel_size, in_features, out_features ])\n",
104 |     "  conv = tf.nn.conv2d(input, W, [ 1, 1, 1, 1 ], padding='SAME')\n",
105 |     "  if with_bias:\n",
106 |     "    return conv + bias_variable([ out_features ])\n",
107 |     "  return conv\n",
108 |     "\n",
109 |     "def batch_activ_conv(current, in_features, out_features, kernel_size, is_training, keep_prob):\n",
110 |     "  current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)\n",
111 |     "  current = tf.nn.relu(current)\n",
112 |     "  current = conv2d(current, in_features, out_features, kernel_size)\n",
113 |     "  current = tf.nn.dropout(current, keep_prob)\n",
114 |     "  return current\n",
115 |     "\n",
116 |     "def block(input, layers, in_features, growth, is_training, keep_prob):\n",
117 |     "  current = input\n",
118 |     "  features = in_features\n",
119 |     "  for idx in range(layers):\n",
120 |     "    tmp = batch_activ_conv(current, features, growth, 3, is_training, keep_prob)\n",
121 |     "    current = tf.concat((current, tmp),3)\n",
122 |     "    features += growth\n",
123 |     "  return current, features\n",
124 |     "\n",
125 |     "def avg_pool(input, s):\n",
126 |     "  return tf.nn.avg_pool(input, [ 1, s, s, 1 ], [1, s, s, 1 ], 'VALID')\n",
127 |     "\n"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 6,
133 |    "metadata": {
134 |     "collapsed": true
135 |    },
136 |    "outputs": [],
137 |    "source": [
138 |     "data_dir = './data'\n",
139 |     "image_size = 32\n",
140 |     "image_dim = image_size * image_size * 3\n",
141 |     "# meta = unpickle(data_dir + '/batches.meta')\n",
142 |     "# label_names = meta['label_names']\n",
143 |     "# label_count = len(label_names)\n",
144 |     "label_count = 10\n",
145 |     "# train_files = [ 'data_batch_%d' % d for d in range(1, 6) ]\n",
146 |     "# train_data, train_labels = load_data(train_files, data_dir, label_count)\n",
147 |     "# pi = np.random.permutation(len(train_data))\n",
148 |     "# train_data, train_labels = train_data[pi], train_labels[pi]\n",
149 |     "# test_data, test_labels = load_data([ 'test_batch' ], data_dir, label_count)\n",
150 |     "# print (\"Train:\", np.shape(train_data), np.shape(train_labels))\n",
151 |     "# print (\"Test:\", np.shape(test_data), np.shape(test_labels))\n",
152 |     "# data = { 'train_data': train_data,\n",
153 |     "#   'train_labels': train_labels,\n",
154 |     "#   'test_data': test_data,\n",
155 |     "#   'test_labels': test_labels }\n",
156 |     "depth = 40"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": null,
162 |    "metadata": {
163 |     "collapsed": true
164 |    },
165 |    "outputs": [],
166 |    "source": [
167 |     "\n",
168 |     "weight_decay = 1e-4\n",
169 |     "layers = int((depth - 4) / 3)\n",
170 |     "graph = tf.Graph()\n",
171 |     "\n",
172 |     "xs = tf.placeholder(\"float\", shape=[None, image_dim])\n",
173 |     "ys = tf.placeholder(\"float\", shape=[None, label_count])\n",
174 |     "lr = tf.placeholder(\"float\", shape=[])\n",
175 |     "keep_prob = tf.placeholder(tf.float32)\n",
176 |     "is_training = tf.placeholder(\"bool\", shape=[])\n",
177 |     "\n",
178 |     "\n",
179 |     "current = tf.reshape(xs, [ -1, 32, 32, 3 ])\n",
180 |     "current = conv2d(current, 3, 16, 3)\n",
181 |     "\n",
182 |     "current, features = block(current, layers, 16, 12, is_training, keep_prob)\n",
183 |     "current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)\n",
184 |     "current = avg_pool(current, 2)\n",
185 |     "current, features = block(current, layers, features, 12, is_training, keep_prob)\n",
186 |     "current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)\n",
187 |     "current = avg_pool(current, 2)\n",
188 |     "current, features = block(current, layers, features, 12, is_training, keep_prob)\n",
189 |     "\n",
190 |     "current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)\n",
191 |     "current = tf.nn.relu(current)\n",
192 |     "current = avg_pool(current, 8)\n",
193 |     "final_dim = features\n",
194 |     "current = tf.reshape(current, [ -1, final_dim ])\n",
195 |     "Wfc = weight_variable([ final_dim, label_count ])\n",
196 |     "bfc = bias_variable([ label_count ])\n",
197 |     "ys_ = tf.nn.softmax( tf.matmul(current, Wfc) + bfc )\n",
198 |     "\n",
199 |     "cross_entropy = -tf.reduce_mean(ys * tf.log(ys_ + 1e-12))\n",
200 |     "l2 = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])\n",
201 |     "train_step = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True).minimize(cross_entropy + l2 * weight_decay)\n",
202 |     "correct_prediction = tf.equal(tf.argmax(ys_, 1), tf.argmax(ys, 1))\n",
203 |     "accuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\"))\n",
204 |     "    \n"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "metadata": {
211 |     "collapsed": true
212 |    },
213 |    "outputs": [],
214 |    "source": [
215 |     "para_dict={}\n",
216 |     "for k in tf.global_variables():\n",
217 |     "    if k not in tf.contrib.framework.get_variables_by_suffix('Momentum'): #Load all parameters except ones of optimization functions\n",
218 |     "            para_dict[k.name[:-2]] = k"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": null,
224 |    "metadata": {
225 |     "collapsed": true
226 |    },
227 |    "outputs": [],
228 |    "source": [
229 |     "sess=tf.InteractiveSession()\n",
230 |     "saver = tf.train.Saver(para_dict)\n",
231 |     "#saver.restore(sess,'./inqmodel/stage2/64pinq80/64pinq80ok_93149_7.ckpt')\n",
232 |     "#saver.restore(sess,'./modellog/weightonlypara93.ckpt')\n",
233 |     "saver.restore(sess,'./inqmodel/stage2/inq16_97/inq1697_92729_5.ckpt')\n",
234 |     "#saver.restore(sess,'./prunemodel/stage2/inc100adj/prune100ar_92969_10ok.ckpt')\n",
235 |     "\n",
236 |     "##################End of Pretrained Parameters Loading############################################### "
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "metadata": {
243 |     "collapsed": true
244 |    },
245 |    "outputs": [],
246 |    "source": [
247 |     "import config\n",
248 |     "#Nearly all hyperparameters are set in config.py "
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": 11,
254 |    "metadata": {
255 |     "collapsed": true
256 |    },
257 |    "outputs": [],
258 |    "source": [
259 |     "def apply_inq(weights, inq_dict): # Apply INQ\n",
260 |     "   \n",
261 |     "    for target in config.all_para:\n",
262 |     "        wl = target\n",
263 |     "        bit = config.inq_para[wl]\n",
264 |     "        # Get target layer's weights\n",
265 |     "        weight_obj = weights[wl]\n",
266 |     "        weight_arr = weight_obj.eval()\n",
267 |     "        \n",
268 |     "        \n",
269 |     "        weight_rest = np.reshape(weight_arr,[-1])\n",
270 |     "        dic_tem = np.reshape(inq_dict[wl],[-1])\n",
271 |     "        idx_rest = np.flip(np.argsort(abs(np.reshape(weight_rest,[-1]))),0) #choose which weights to be INQed\n",
272 |     "        num_prune = int(len(weight_rest)*config.inqpercen_para[wl]) #how many weights to be INQed\n",
273 |     "        weight_toINQ = weight_rest[idx_rest[:num_prune]] \n",
274 |     "        \n",
275 |     "        #calculate INQ bounds\n",
276 |     "        n1 = (np.floor(np.log2(max(abs(np.reshape(weight_arr,[-1])))*4/3)))\n",
277 |     "        n2 = n1 +1 - bit/4\n",
278 |     "        upper_bound = 2**(np.floor(np.log2(max(abs(np.reshape(weight_arr,[-1])))*4/3)))\n",
279 |     "        lower_bound = 2**(n1 +1 - bit/4)\n",
280 |     "        \n",
281 |     "        #INQ\n",
282 |     "        weight_toINQ[abs(weight_toINQ) < lower_bound] = 0\n",
283 |     "        weight_toINQ[weight_toINQ != 0] = 2**(np.floor(np.log2(abs(weight_toINQ[weight_toINQ != 0]*4/3))))*np.sign(weight_toINQ[weight_toINQ != 0])\n",
284 |     "\n",
285 |     "        \n",
286 |     "        weight_rest[idx_rest[:num_prune]] = weight_toINQ   \n",
287 |     "        weight_arr =  np.reshape(weight_rest,np.shape(weight_arr))\n",
288 |     "        dic_tem [idx_rest[:num_prune]] =  np.zeros_like(dic_tem [idx_rest[:num_prune]])\n",
289 |     "        inq_dict[wl] = np.reshape(dic_tem,np.shape(inq_dict[wl]))\n",
290 |     "     \n",
291 |     "        # Store INQed weights as tensorflow objects\n",
292 |     "        sess.run(weight_obj.assign(weight_arr))\n",
293 |     "\n",
294 |     "    return inq_dict"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "execution_count": 12,
300 |    "metadata": {
301 |     "collapsed": true
302 |    },
303 |    "outputs": [],
304 |    "source": [
305 |     "prune_dict = {}\n",
306 |     "for target in config.all_para: #choose which layers\n",
307 |     "    wl =target\n",
308 |     "    weight_obj = para_dict[wl]\n",
309 |     "    prune_dict[wl] = np.ones_like(weight_obj.eval())"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "code",
314 |    "execution_count": 13,
315 |    "metadata": {},
316 |    "outputs": [
317 |     {
318 |      "name": "stdout",
319 |      "output_type": "stream",
320 |      "text": [
321 |       "-1.0 -4.0 3.0\n",
322 |       "Variable 0.0625 0.176776695297 0.353553390593\n",
323 |       "-2.0 -5.0 3.0\n",
324 |       "Variable_1 0.03125 0.148650889375 0.210224103813\n",
325 |       "-3.0 -6.0 3.0\n",
326 |       "Variable_2 0.015625 0.136313466583 0.114625505401\n",
327 |       "-3.0 -6.0 3.0\n",
328 |       "Variable_3 0.015625 0.136313466583 0.114625505401\n",
329 |       "-3.0 -6.0 3.0\n",
330 |       "Variable_4 0.015625 0.136313466583 0.114625505401\n",
331 |       "-3.0 -6.0 3.0\n",
332 |       "Variable_5 0.015625 0.136313466583 0.114625505401\n",
333 |       "-3.0 -6.0 3.0\n",
334 |       "Variable_6 0.015625 0.136313466583 0.114625505401\n",
335 |       "-3.0 -6.0 3.0\n",
336 |       "Variable_7 0.015625 0.136313466583 0.114625505401\n",
337 |       "-3.0 -6.0 3.0\n",
338 |       "Variable_8 0.015625 0.136313466583 0.114625505401\n",
339 |       "-3.0 -6.0 3.0\n",
340 |       "Variable_9 0.015625 0.136313466583 0.114625505401\n",
341 |       "-3.0 -6.0 3.0\n",
342 |       "Variable_10 0.015625 0.136313466583 0.114625505401\n",
343 |       "-4.0 -7.0 3.0\n",
344 |       "Variable_11 0.0078125 0.130534222803 0.0598502050437\n",
345 |       "-3.0 -6.0 3.0\n",
346 |       "Variable_12 0.015625 0.136313466583 0.114625505401\n",
347 |       "-3.0 -6.0 3.0\n",
348 |       "Variable_13 0.015625 0.136313466583 0.114625505401\n",
349 |       "-4.0 -7.0 3.0\n",
350 |       "Variable_14 0.0078125 0.130534222803 0.0598502050437\n",
351 |       "-3.0 -6.0 3.0\n",
352 |       "Variable_15 0.015625 0.136313466583 0.114625505401\n",
353 |       "-3.0 -6.0 3.0\n",
354 |       "Variable_16 0.015625 0.136313466583 0.114625505401\n",
355 |       "-4.0 -7.0 3.0\n",
356 |       "Variable_17 0.0078125 0.130534222803 0.0598502050437\n",
357 |       "-4.0 -7.0 3.0\n",
358 |       "Variable_18 0.0078125 0.130534222803 0.0598502050437\n",
359 |       "-3.0 -6.0 3.0\n",
360 |       "Variable_19 0.015625 0.136313466583 0.114625505401\n",
361 |       "-4.0 -7.0 3.0\n",
362 |       "Variable_20 0.0078125 0.130534222803 0.0598502050437\n",
363 |       "-4.0 -7.0 3.0\n",
364 |       "Variable_21 0.0078125 0.130534222803 0.0598502050437\n",
365 |       "-4.0 -7.0 3.0\n",
366 |       "Variable_22 0.0078125 0.130534222803 0.0598502050437\n",
367 |       "-4.0 -7.0 3.0\n",
368 |       "Variable_23 0.0078125 0.130534222803 0.0598502050437\n",
369 |       "-4.0 -7.0 3.0\n",
370 |       "Variable_24 0.0078125 0.130534222803 0.0598502050437\n",
371 |       "-4.0 -7.0 3.0\n",
372 |       "Variable_25 0.0078125 0.130534222803 0.0598502050437\n",
373 |       "-3.0 -6.0 3.0\n",
374 |       "Variable_26 0.015625 0.136313466583 0.114625505401\n",
375 |       "-4.0 -7.0 3.0\n",
376 |       "Variable_27 0.0078125 0.130534222803 0.0598502050437\n",
377 |       "-4.0 -7.0 3.0\n",
378 |       "Variable_28 0.0078125 0.130534222803 0.0598502050437\n",
379 |       "-4.0 -7.0 3.0\n",
380 |       "Variable_29 0.0078125 0.130534222803 0.0598502050437\n",
381 |       "-4.0 -7.0 3.0\n",
382 |       "Variable_30 0.0078125 0.130534222803 0.0598502050437\n",
383 |       "-4.0 -7.0 3.0\n",
384 |       "Variable_31 0.0078125 0.130534222803 0.0598502050437\n",
385 |       "-4.0 -7.0 3.0\n",
386 |       "Variable_32 0.0078125 0.130534222803 0.0598502050437\n",
387 |       "-4.0 -7.0 3.0\n",
388 |       "Variable_33 0.0078125 0.130534222803 0.0598502050437\n",
389 |       "-3.0 -6.0 3.0\n",
390 |       "Variable_34 0.015625 0.136313466583 0.114625505401\n",
391 |       "-3.0 -6.0 3.0\n",
392 |       "Variable_35 0.015625 0.136313466583 0.114625505401\n",
393 |       "-3.0 -6.0 3.0\n",
394 |       "Variable_36 0.015625 0.136313466583 0.114625505401\n",
395 |       "-3.0 -6.0 3.0\n",
396 |       "Variable_37 0.015625 0.136313466583 0.114625505401\n",
397 |       "-3.0 -6.0 3.0\n",
398 |       "Variable_38 0.015625 0.136313466583 0.114625505401\n",
399 |       "0.0 -3.0 3.0\n",
400 |       "Variable_39 0.125 0.25 0.5\n"
401 |      ]
402 |     }
403 |    ],
404 |    "source": [
405 |     "prune_dict = apply_inq(para_dict, prune_dict)"
406 |    ]
407 |   },
408 |   {
409 |    "cell_type": "code",
410 |    "execution_count": 14,
411 |    "metadata": {},
412 |    "outputs": [
413 |     {
414 |      "data": {
415 |       "text/plain": [
416 |        "'./inqmodel/stage1/inq1697.ckpt'"
417 |       ]
418 |      },
419 |      "execution_count": 14,
420 |      "metadata": {},
421 |      "output_type": "execute_result"
422 |     }
423 |    ],
424 |    "source": [
425 |     "saver.save(sess,'./inqmodel/stage1/inq1697.ckpt') #save INQed parameters"
426 |    ]
427 |   },
428 |   {
429 |    "cell_type": "code",
430 |    "execution_count": null,
431 |    "metadata": {
432 |     "collapsed": true
433 |    },
434 |    "outputs": [],
435 |    "source": [
436 |     "#save INQ mask\n",
437 |     "import pickle\n",
438 |     "# create dict\n",
439 |     "# save dict\n",
440 |     "f1 = open(\"C:/Users/lhlne/Desktop/project/densenet/inqmodel/stage1/inq1697.txt\",\"wb\")\n",
441 |     "pickle.dump(prune_dict, f1)\n",
442 |     "f1.close()\n",
443 |     "# load dict\n",
444 |     "f2 = open(\"C:/Users/lhlne/Desktop/project/densenet/inqmodel/stage1/inq1697.txt\",\"rb\")\n",
445 |     "load_list = pickle.load(f2)\n",
446 |     "f2.close()\n",
447 |     "# print \n",
448 |     "print(load_list)"
449 |    ]
450 |   }
451 |  ],
452 |  "metadata": {
453 |   "kernelspec": {
454 |    "display_name": "Python 3",
455 |    "language": "python",
456 |    "name": "python3"
457 |   },
458 |   "language_info": {
459 |    "codemirror_mode": {
460 |     "name": "ipython",
461 |     "version": 3
462 |    },
463 |    "file_extension": ".py",
464 |    "mimetype": "text/x-python",
465 |    "name": "python",
466 |    "nbconvert_exporter": "python",
467 |    "pygments_lexer": "ipython3",
468 |    "version": "3.6.1"
469 |   }
470 |  },
471 |  "nbformat": 4,
472 |  "nbformat_minor": 2
473 | }
474 | 


--------------------------------------------------------------------------------