├── .gitignore ├── Autoencoders ├── .ipynb_checkpoints │ ├── CNN autoencoder-checkpoint.ipynb │ ├── CNN_demo-checkpoint.ipynb │ ├── CNN_mtezcan _report-checkpoint.ipynb │ ├── CNN_mtezcan old-checkpoint.ipynb │ ├── CNN_mtezcan-checkpoint.ipynb │ ├── CNN_mtezcan_with_error-checkpoint.ipynb │ └── Untitled-checkpoint.ipynb ├── CNN-AE.ipynb ├── README.md ├── __pycache__ │ └── read_cifar10.cpython-36.pyc ├── deprecated │ ├── CNN-RNN-AE.ipynb │ ├── CNN_demo.ipynb │ ├── CNN_mtezcan old.ipynb │ ├── CNN_mtezcan.ipynb │ ├── MLP-AE.ipynb │ ├── back_mlp_image_compression.ipynb │ ├── dummy.ipynb │ ├── front_mlp_image_compression.ipynb │ ├── google_rnn.ipynb │ └── mlp_image_compression.py └── functions │ └── .idea │ └── workspace.xml ├── GAN-AE ├── README.md ├── lena.tiff ├── main.py ├── models.py ├── read_cifar10.py ├── read_data.py └── utils.py ├── GAN ├── CIFAR-10 │ └── cifar-10-batches-py │ │ ├── batches.meta │ │ └── readme.html ├── README.md ├── main.py ├── models.py ├── models.pyc ├── read_cifar10.py ├── read_cifar10.pyc ├── read_data.py ├── read_data.pyc ├── utils.py └── utils.pyc ├── MLP_lossless ├── .ipynb_checkpoints │ ├── MLP-checkpoint.ipynb │ ├── Untitled-checkpoint.ipynb │ └── desktop.ini ├── MLP.ipynb ├── MayboleCastleLargeImage.bmp ├── README.md ├── baboon.bmp ├── benchmark │ ├── Baboon_cheek.jpg │ ├── Baboon_eye.jpg │ ├── JPEGLS_codec.m │ ├── MayboleCastleLargeImage.bmp │ ├── baboon.bmp │ ├── baboon_cheek.bmp │ ├── baboon_eye.bmp │ ├── bppHuffman.m │ ├── buff.mat │ ├── desktop.ini │ ├── lena512.bmp │ └── lena_eye.bmp └── lena512.bmp ├── README.md ├── SSIM.ipynb ├── final_figures.pptx ├── functions ├── .idea │ ├── functions.iml │ ├── misc.xml │ ├── modules.xml │ └── workspace.xml ├── __pycache__ │ ├── dataset_generator.cpython-36.pyc │ ├── image_func.cpython-36.pyc │ ├── network.cpython-36.pyc │ └── read_cifar10.cpython-36.pyc ├── dataset_generator.py ├── image_func.py ├── network.py └── read_cifar10.py ├── report.pdf ├── test_img ├── bu2010_recon.tif ├── jpg_test.m ├── lena512color.tiff ├── lion.tiff └── lion_recon2_convrealFTpx8.tiff └── wiki image.png /.gitignore: -------------------------------------------------------------------------------- 1 | Autoencoders/CIFAR-10 2 | Autoencoders/.ipynb_checkpoints 3 | cifar10_recon* 4 | -------------------------------------------------------------------------------- /Autoencoders/.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "\n", 12 | "import skimage.io\n", 13 | "import skimage.color\n", 14 | "import numpy as np" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "import read_cifar10 as cf10\n", 26 | "\n", 27 | "#@read_data.restartable\n", 28 | "def cifar10_dataset_generator(dataset_name, batch_size, restrict_size=1000):\n", 29 | " assert dataset_name in ['train', 'test']\n", 30 | " assert batch_size > 0 or batch_size == -1 # -1 for entire dataset\n", 31 | " \n", 32 | " X_all_unrestricted, y_all = (cf10.load_training_data() if dataset_name == 'train'\n", 33 | " else cf10.load_test_data())\n", 34 | " \n", 35 | " actual_restrict_size = restrict_size if dataset_name == 'train' else int(1e10)\n", 36 | " X_all = X_all_unrestricted[:actual_restrict_size]\n", 37 | " data_len = X_all.shape[0]\n", 38 | " batch_size = batch_size if batch_size > 0 else data_len\n", 39 | " \n", 40 | " X_all_padded = np.concatenate([X_all, X_all[:batch_size]], axis=0)\n", 41 | " y_all_padded = np.concatenate([y_all, y_all[:batch_size]], axis=0)\n", 42 | " \n", 43 | " for slice_i in range(math.ceil(data_len / batch_size)):\n", 44 | " idx = slice_i * batch_size\n", 45 | " #X_batch = X_all_padded[idx:idx + batch_size]\n", 46 | " X_batch = X_all_padded[idx:idx + batch_size]*255 # bugfix: thanks Zezhou Sun!\n", 47 | " y_batch = np.ravel(y_all_padded[idx:idx + batch_size])\n", 48 | " yield X_batch.astype(np.uint8), y_batch.astype(np.uint8)\n", 49 | "\n", 50 | "cifar10_dataset_generators = {\n", 51 | " 'train': cifar10_dataset_generator('train', 1000),\n", 52 | " 'test': cifar10_dataset_generator('test', -1)\n", 53 | "}\n" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 3, 59 | "metadata": { 60 | "collapsed": false 61 | }, 62 | "outputs": [ 63 | { 64 | "name": "stdout", 65 | "output_type": "stream", 66 | "text": [ 67 | "(50000, 32, 32, 3)\n", 68 | "(10000, 32, 32, 3)\n" 69 | ] 70 | } 71 | ], 72 | "source": [ 73 | "#Load cifar-10 data\n", 74 | "\n", 75 | "cf10_tr=cf10.load_training_data()\n", 76 | "cf10_tr_img=cf10_tr[0]\n", 77 | "cf10_tr_label = cf10_tr[1]\n", 78 | "print(cf10_tr_img.shape)\n", 79 | "\n", 80 | "cf10_test=cf10.load_test_data()\n", 81 | "cf10_test_img=cf10_test[0]\n", 82 | "cf10_test_label = cf10_test[1]\n", 83 | "print(cf10_test_img.shape)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 4, 89 | "metadata": { 90 | "collapsed": false 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "cf10_test_img_gray=(cf10_test_img[:,:,:,0]+cf10_test_img[:,:,:,1]+cf10_test_img[:,:,:,2])/3.\n", 95 | "cf10_tr_img_gray=(cf10_tr_img[:,:,:,0]+cf10_tr_img[:,:,:,1]+cf10_tr_img[:,:,:,2])/3.\n", 96 | "cf10_tr_vec=np.zeros((50000,1024))\n", 97 | "cf10_test_vec=np.zeros((10000,1024))" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 5, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [ 107 | { 108 | "name": "stdout", 109 | "output_type": "stream", 110 | "text": [ 111 | "test is done\n", 112 | "train is done\n" 113 | ] 114 | } 115 | ], 116 | "source": [ 117 | "#Calculate the metrics for jpg\n", 118 | "\n", 119 | "\n", 120 | "mse_jpg=np.zeros((10000,1))\n", 121 | "psnr_jpg=np.zeros((10000,1))\n", 122 | "#Save cifar test images\n", 123 | "for k in range(10000):\n", 124 | " img_gray=(255*cf10_test_img_gray[k,:,:]).astype(np.uint8)\n", 125 | " skimage.io.imsave('../cifar10_jpg/'+str(k)+'.jpg',img_gray)\n", 126 | " \n", 127 | " img_vec = img_gray.reshape([1,-1])\n", 128 | " cf10_test_vec[k,:]=img_vec\n", 129 | " img_recons = skimage.io.imread('../cifar10_jpg/'+str(k)+'.jpg')\n", 130 | " mse=float(((img_recons-img_gray)**2).mean())/255.\n", 131 | " mse_jpg[k]=mse\n", 132 | " psnr_jpg[k]=10.*np.log10(1./mse)\n", 133 | "\n", 134 | "print('test is done')\n", 135 | "for k in range(50000):\n", 136 | " img_gray=(255*cf10_tr_img_gray[k,:,:]).astype(np.uint8)\n", 137 | " img_vec = img_gray.reshape([1,-1])\n", 138 | " cf10_tr_vec[k,:]=img_vec\n", 139 | " \n", 140 | "print('train is done')\n", 141 | "\n" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 6, 147 | "metadata": { 148 | "collapsed": false 149 | }, 150 | "outputs": [ 151 | { 152 | "name": "stdout", 153 | "output_type": "stream", 154 | "text": [ 155 | "9995\n", 156 | "9996\n", 157 | "9997\n", 158 | "9998\n", 159 | "9999\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "A= np.array([[1,2,3],[2,4,5]])\n", 165 | "#print(np.array([[1,2,3]]).reshape([-1]).tolist() )\n", 166 | "\n", 167 | "for k in range(9995,10000):\n", 168 | " if(cf10_test_vec[k,:].reshape([-1]).tolist() in cf10_tr_vec.tolist()):\n", 169 | " print(str(k)+':(')\n", 170 | " else:\n", 171 | " print(k)" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 7, 177 | "metadata": { 178 | "collapsed": false 179 | }, 180 | "outputs": [ 181 | { 182 | "name": "stdout", 183 | "output_type": "stream", 184 | "text": [ 185 | "1.57181456769e-06\n", 186 | "10.1233693327\n" 187 | ] 188 | } 189 | ], 190 | "source": [ 191 | "print(mse_jpg.mean()/(255.*255.))\n", 192 | "print(psnr_jpg.mean())" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "# Part-1 - MLP" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 8, 205 | "metadata": { 206 | "collapsed": false 207 | }, 208 | "outputs": [ 209 | { 210 | "ename": "NameError", 211 | "evalue": "name 'pprint' is not defined", 212 | "output_type": "error", 213 | "traceback": [ 214 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 215 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 216 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mx_test\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcf10_test_vec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat32\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;36m255.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mx_test\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mx_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mpprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_tr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mpprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 217 | "\u001b[0;31mNameError\u001b[0m: name 'pprint' is not defined" 218 | ] 219 | } 220 | ], 221 | "source": [ 222 | "x_tr = cf10_tr_vec.astype(np.float32)/255.\n", 223 | "x_test = cf10_test_vec.astype(np.float32)/255.\n", 224 | "x_test=x_test[:1000,:]\n", 225 | "pprint(x_tr)\n", 226 | "pprint(x_test)" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 9, 232 | "metadata": { 233 | "collapsed": false 234 | }, 235 | "outputs": [], 236 | "source": [ 237 | "import tensorflow as tf\n", 238 | "\n", 239 | "def mlp1(x, hidden_sizes, activation_fn=tf.nn.relu,dropout_rate=1.0,std_dev=1.0):\n", 240 | " if not isinstance(hidden_sizes, (list, tuple)):\n", 241 | " raise ValueError(\"hidden_sizes must be a list or a tuple\")\n", 242 | " scope_args = {'initializer': tf.random_normal_initializer(stddev=std_dev)}\n", 243 | " for k in range(len(hidden_sizes)-1):\n", 244 | " layer_name=\"weights\"+str(k)\n", 245 | " #FC layers\n", 246 | " with tf.variable_scope(layer_name, **scope_args):\n", 247 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes[k]])\n", 248 | " b = tf.get_variable('b', shape=[hidden_sizes[k]])\n", 249 | " x = activation_fn(tf.matmul(x, W) + b)\n", 250 | " #Dropout before the last layer\n", 251 | " x = tf.nn.dropout(x, keep_prob=dropout_rate)\n", 252 | " #Softmax layer\n", 253 | " with tf.variable_scope('outlayer', **scope_args):\n", 254 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes[-1]])\n", 255 | " b = tf.get_variable('b', shape=[hidden_sizes[-1]])\n", 256 | " return tf.matmul(x, W) + b" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 10, 262 | "metadata": { 263 | "collapsed": false 264 | }, 265 | "outputs": [], 266 | "source": [ 267 | "import tensorflow as tf\n", 268 | "\n", 269 | "def mlp2(x, hidden_sizes_1,hidden_sizes_2, activation_fn=tf.nn.relu,dropout_rate=1.0,std_dev=1.0):\n", 270 | " scope_args = {'initializer': tf.random_normal_initializer(stddev=std_dev)}\n", 271 | " for k in range(len(hidden_sizes_1)-1):\n", 272 | " layer_name=\"weights_enc\"+str(k)\n", 273 | " #FC layers\n", 274 | " with tf.variable_scope(layer_name, **scope_args):\n", 275 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes_1[k]])\n", 276 | " b = tf.get_variable('b', shape=[hidden_sizes_1[k]])\n", 277 | " x = activation_fn(tf.matmul(x, W) + b)\n", 278 | " #Dropout before the last layer\n", 279 | " x = tf.nn.dropout(x, keep_prob=dropout_rate)\n", 280 | " #Softmax layer\n", 281 | " with tf.variable_scope('outlayer_enc', **scope_args):\n", 282 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes_1[-1]])\n", 283 | " b = tf.get_variable('b', shape=[hidden_sizes_1[-1]])\n", 284 | " x = activation_fn(tf.matmul(x, W) + b)\n", 285 | " \n", 286 | " x_quant = tf.round(x*255.)/255.\n", 287 | " \n", 288 | " for k in range(len(hidden_sizes_2)-1):\n", 289 | " layer_name=\"weights_dec\"+str(k)\n", 290 | " #FC layers\n", 291 | " with tf.variable_scope(layer_name, **scope_args):\n", 292 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes_2[k]])\n", 293 | " b = tf.get_variable('b', shape=[hidden_sizes_2[k]])\n", 294 | " x = activation_fn(tf.matmul(x, W) + b)\n", 295 | " x_quant=(activation_fn(tf.matmul(x_quant, W) + b))\n", 296 | " #Dropout before the last layer\n", 297 | " x = tf.nn.dropout(x, keep_prob=dropout_rate)\n", 298 | " #Softmax layer\n", 299 | " with tf.variable_scope('outlayer_dec', **scope_args):\n", 300 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes_2[-1]])\n", 301 | " b = tf.get_variable('b', shape=[hidden_sizes_2[-1]])\n", 302 | " \n", 303 | " return (tf.matmul(x, W) + b,tf.matmul(x_quant, W) + b)" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 13, 309 | "metadata": { 310 | "collapsed": false, 311 | "scrolled": false 312 | }, 313 | "outputs": [ 314 | { 315 | "name": "stdout", 316 | "output_type": "stream", 317 | "text": [ 318 | "iteration 0\t train mse: 6.549\t\n", 319 | "iteration 0\t TEST MSE: 7.306\t 7.306\t\n", 320 | "iteration 1000\t train mse: 0.025\t\n", 321 | "iteration 2000\t train mse: 0.022\t\n", 322 | "iteration 3000\t train mse: 0.021\t\n", 323 | "iteration 4000\t train mse: 0.021\t\n", 324 | "iteration 5000\t train mse: 0.021\t\n", 325 | "iteration 5000\t TEST MSE: 0.021\t 0.021\t\n", 326 | "iteration 6000\t train mse: 0.021\t\n", 327 | "iteration 7000\t train mse: 0.021\t\n", 328 | "iteration 8000\t train mse: 0.021\t\n", 329 | "iteration 9000\t train mse: 0.021\t\n", 330 | "iteration 10000\t train mse: 0.021\t\n", 331 | "iteration 10000\t TEST MSE: 0.021\t 0.021\t\n" 332 | ] 333 | } 334 | ], 335 | "source": [ 336 | "import pprint\n", 337 | "\n", 338 | "def test_classification(model_function, learning_rate=0.1):\n", 339 | "\n", 340 | " with tf.Graph().as_default() as g:\n", 341 | " # where are you going to allocate memory and perform computations\n", 342 | " with tf.device(\"/gpu:0\"):\n", 343 | " \n", 344 | " # define model \"input placeholders\", i.e. variables that are\n", 345 | " # going to be substituted with input data on train/test time\n", 346 | " x_ = tf.placeholder(tf.float32, [None, 1024])\n", 347 | " #y_logits = model_function(x_)\n", 348 | " y_logits,x_recon = model_function(x_)\n", 349 | "\n", 350 | " \n", 351 | " # naive implementation of loss:\n", 352 | " # > losses = y_ * tf.log(tf.nn.softmax(y_logits))\n", 353 | " # > tf.reduce_mean(-tf.reduce_sum(losses, 1))\n", 354 | " # can be numerically unstable.\n", 355 | " #\n", 356 | " # so here we use tf.nn.softmax_cross_entropy_with_logits on the raw\n", 357 | " # outputs of 'y', and then average across the batch.\n", 358 | " \n", 359 | " loss = tf.reduce_mean(tf.subtract(x_,y_logits)**2)\n", 360 | " loss2=tf.reduce_mean(tf.subtract(x_,x_recon)**2)\n", 361 | " train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)\n", 362 | " \n", 363 | " #y_pred = tf.argmax(tf.nn.softmax(y_logits), dimension=1)\n", 364 | " #correct_prediction = tf.equal(y_pred, tf.argmax(y_, 1))\n", 365 | " #accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n", 366 | "\n", 367 | " with g.as_default(), tf.Session() as sess:\n", 368 | " # that is how we \"execute\" statements \n", 369 | " # (return None, e.g. init() or train_op())\n", 370 | " # or compute parts of graph defined above (loss, output, etc.)\n", 371 | " # given certain input (x_, y_)\n", 372 | " #sess.run(tf.initialize_all_variables())\n", 373 | " sess.run(tf.global_variables_initializer())\n", 374 | " \n", 375 | " # train\n", 376 | " ids=[i for i in range(100)]\n", 377 | " for iter_i in range(1000001):\n", 378 | " batch_xs = x_tr[ids,:] \n", 379 | " ids=[(ids[0]+100+i)%x_tr.shape[0] for i in range(100)]\n", 380 | " sess.run(train_step, feed_dict={x_: batch_xs})\n", 381 | " \n", 382 | " # test trained model\n", 383 | " if iter_i % 1000 == 0:\n", 384 | " tf_feed_dict = {x_: batch_xs}\n", 385 | " loss_val = sess.run(loss, feed_dict=tf_feed_dict)\n", 386 | " print('iteration %d\\t train mse: %.3f\\t'%(iter_i,loss_val))\n", 387 | " if iter_i%5000 == 0:\n", 388 | " \n", 389 | " loss_val_test = sess.run(loss, feed_dict={x_:x_test})\n", 390 | " loss_val2_test = sess.run(loss2, feed_dict={x_:x_test})\n", 391 | " print('iteration %d\\t TEST MSE: %.3f\\t %.3f\\t'%(iter_i,loss_val_test,loss_val2_test))\n", 392 | " '''\n", 393 | " x_from_tr=sess.run(y_logits, feed_dict={x_:batch_xs[:5,:].reshape([-1,1024])})\n", 394 | " x_from_test=sess.run(y_logits, feed_dict={x_:x_test[0:5,:].reshape([-1,1024])})\n", 395 | " print('Train')\n", 396 | " print((x_from_tr-batch_xs[:5,:])**2)\n", 397 | " print('Test')\n", 398 | " print((x_from_test-x_test[:5,:])**2)\n", 399 | " '''\n", 400 | " \n", 401 | " \n", 402 | "#test_classification(lambda x: mlp1(x, [850,700,500,700,850,1024],\n", 403 | "# activation_fn=tf.nn.relu,std_dev=1e-1), learning_rate=1e-3)\n", 404 | "\n", 405 | "test_classification(lambda x: mlp2(x, [1024],[1024],\n", 406 | " activation_fn=tf.nn.relu,std_dev=1e-1), learning_rate=1e-3)" 407 | ] 408 | } 409 | ], 410 | "metadata": { 411 | "kernelspec": { 412 | "display_name": "Python 3", 413 | "language": "python", 414 | "name": "python3" 415 | }, 416 | "language_info": { 417 | "codemirror_mode": { 418 | "name": "ipython", 419 | "version": 3 420 | }, 421 | "file_extension": ".py", 422 | "mimetype": "text/x-python", 423 | "name": "python", 424 | "nbconvert_exporter": "python", 425 | "pygments_lexer": "ipython3", 426 | "version": "3.6.0" 427 | } 428 | }, 429 | "nbformat": 4, 430 | "nbformat_minor": 2 431 | } 432 | -------------------------------------------------------------------------------- /Autoencoders/README.md: -------------------------------------------------------------------------------- 1 | ## Lossy image compression using autoencoders 2 | 3 | This part 4 different aprroaches with autoencoders. 4 | For running the codes, follow the steps in notebook files 5 | - [MLP-AE](https://github.com/scelesticsiva/Neural-Networks-for-Image-Compression/blob/master/Autoencoders/MLP-AE.ipynb) 6 | - [CNN-AE and CNN-AE-FT](https://github.com/scelesticsiva/Neural-Networks-for-Image-Compression/blob/master/Autoencoders/CNN-AE-FT.ipynb) 7 | - [CNN-RNN-AE](https://github.com/scelesticsiva/Neural-Networks-for-Image-Compression/blob/master/Autoencoders/CNN-RNN-AE.ipynb) 8 | 9 | -------------------------------------------------------------------------------- /Autoencoders/__pycache__/read_cifar10.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/Autoencoders/__pycache__/read_cifar10.cpython-36.pyc -------------------------------------------------------------------------------- /Autoencoders/deprecated/CNN-RNN-AE.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import tensorflow as tf\n", 12 | "import skimage.io\n", 13 | "import skimage.color\n", 14 | "import numpy as np\n", 15 | "from pprint import pprint\n", 16 | "\n", 17 | "import sys\n", 18 | "sys.path.insert(1,'../functions') \n", 19 | "\n", 20 | "import dataset_generator as data\n", 21 | "import read_cifar10 as cf10\n", 22 | "import image_func as imf\n", 23 | "import network" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": { 30 | "collapsed": false 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "cifar10_dataset_generators = {\n", 35 | " 'train': data.cifar10_dataset_generator('train', 1000),\n", 36 | " 'test': data.cifar10_dataset_generator('test', -1)\n", 37 | "}\n" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "collapsed": false 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "#Load cifar-10 data\n", 49 | "cf10_tr=cf10.load_training_data()\n", 50 | "cf10_tr_img=cf10_tr[0]\n", 51 | "cf10_tr_label = cf10_tr[1]\n", 52 | "\n", 53 | "cf10_test=cf10.load_test_data()\n", 54 | "cf10_test_img=cf10_test[0]\n", 55 | "cf10_test_label = cf10_test[1]" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": false 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "lena_img = skimage.io.imread('../test_img/lena512color.tiff')\n", 67 | "lena_32=imf.img2block(lena_img)\n", 68 | "\n", 69 | " " 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "# Part-2 - CNN" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "collapsed": false 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "#Create the inputs in the desired format\n", 88 | "x_tr = cf10_tr_img.astype(np.float32)#*255.\n", 89 | "x_test = cf10_test_img.astype(np.float32)#*255.\n", 90 | "x_test=x_test[:200,:,:,:]\n", 91 | "img = skimage.io.imread('../test_img/lena512color.tiff')\n", 92 | "im_32=imf.img2block(img)\n" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": { 99 | "collapsed": false 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "tf.reset_default_graph()\n", 104 | "model_dict=network.apply_classification_loss_mse_with_rnn(kernels1=[5,7,9,9],kernels2=[9,7,7,5],\n", 105 | " filters1=[128,64,16,4],filters2=[8,8,3,3],\n", 106 | " pool_size=[1,2,2,1,1,2,2,1],learning_rate=7e-5)\n", 107 | "saver = network.train_model(model_dict,x_tr,x_test,im_32, train_every=100, test_every=200,max_iter=500,load=False,\n", 108 | " fname='cifar10_recon3',outname='/tmp/cnnx4_test2')" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": { 115 | "collapsed": false 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "#YOU NEED TO CREATE A FOLDER NAMED 'cifar10_recon0' BEFORE RUNNING THAT CODE\n", 120 | "tf.reset_default_graph()\n", 121 | "model_dict=network.apply_classification_loss_mse_with_rnn(kernels1=[5,7,9,9],kernels2=[9,7,7,5],\n", 122 | " filters1=[128,64,16,4],filters2=[8,8,3,3],\n", 123 | " pool_size=[1,2,2,1,1,2,2,1],learning_rate=7e-5)\n", 124 | "saver = network.train_model(model_dict,x_tr,x_test,im_32, train_every=100, test_every=100,max_iter=500,load=False,\n", 125 | " fname='cifar10_recon0',outname='/tmp/cnnx4_test0')" 126 | ] 127 | } 128 | ], 129 | "metadata": { 130 | "kernelspec": { 131 | "display_name": "Python 3", 132 | "language": "python", 133 | "name": "python3" 134 | }, 135 | "language_info": { 136 | "codemirror_mode": { 137 | "name": "ipython", 138 | "version": 3 139 | }, 140 | "file_extension": ".py", 141 | "mimetype": "text/x-python", 142 | "name": "python", 143 | "nbconvert_exporter": "python", 144 | "pygments_lexer": "ipython3", 145 | "version": "3.6.0" 146 | } 147 | }, 148 | "nbformat": 4, 149 | "nbformat_minor": 2 150 | } 151 | -------------------------------------------------------------------------------- /Autoencoders/deprecated/CNN_mtezcan old.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import tensorflow as tf\n", 12 | "import skimage.io\n", 13 | "import skimage.color\n", 14 | "import numpy as np\n", 15 | "from pprint import pprint" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "collapsed": false 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "import read_cifar10 as cf10\n", 27 | "\n", 28 | "#@read_data.restartable\n", 29 | "def cifar10_dataset_generator(dataset_name, batch_size, restrict_size=1000):\n", 30 | " assert dataset_name in ['train', 'test']\n", 31 | " assert batch_size > 0 or batch_size == -1 # -1 for entire dataset\n", 32 | " \n", 33 | " X_all_unrestricted, y_all = (cf10.load_training_data() if dataset_name == 'train'\n", 34 | " else cf10.load_test_data())\n", 35 | " \n", 36 | " actual_restrict_size = restrict_size if dataset_name == 'train' else int(1e10)\n", 37 | " X_all = X_all_unrestricted[:actual_restrict_size]\n", 38 | " data_len = X_all.shape[0]\n", 39 | " batch_size = batch_size if batch_size > 0 else data_len\n", 40 | " \n", 41 | " X_all_padded = np.concatenate([X_all, X_all[:batch_size]], axis=0)\n", 42 | " y_all_padded = np.concatenate([y_all, y_all[:batch_size]], axis=0)\n", 43 | " \n", 44 | " for slice_i in range(math.ceil(data_len / batch_size)):\n", 45 | " idx = slice_i * batch_size\n", 46 | " #X_batch = X_all_padded[idx:idx + batch_size]\n", 47 | " X_batch = X_all_padded[idx:idx + batch_size]*255 # bugfix: thanks Zezhou Sun!\n", 48 | " y_batch = np.ravel(y_all_padded[idx:idx + batch_size])\n", 49 | " yield X_batch.astype(np.uint8), y_batch.astype(np.uint8)\n", 50 | "\n", 51 | "cifar10_dataset_generators = {\n", 52 | " 'train': cifar10_dataset_generator('train', 1000),\n", 53 | " 'test': cifar10_dataset_generator('test', -1)\n", 54 | "}\n" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "collapsed": false 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "#Load cifar-10 data\n", 66 | "cf10_tr=cf10.load_training_data()\n", 67 | "cf10_tr_img=cf10_tr[0]\n", 68 | "cf10_tr_label = cf10_tr[1]\n", 69 | "\n", 70 | "cf10_test=cf10.load_test_data()\n", 71 | "cf10_test_img=cf10_test[0]\n", 72 | "cf10_test_label = cf10_test[1]" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": { 79 | "collapsed": false 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "cf10_test_img_gray=(cf10_test_img[:,:,:,0]+cf10_test_img[:,:,:,1]+cf10_test_img[:,:,:,2])/3.\n", 84 | "cf10_tr_img_gray=(cf10_tr_img[:,:,:,0]+cf10_tr_img[:,:,:,1]+cf10_tr_img[:,:,:,2])/3.\n", 85 | "cf10_tr_vec=np.zeros((50000,1024))\n", 86 | "cf10_test_vec=np.zeros((10000,1024))" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": { 93 | "collapsed": false 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "import skimage.io\n", 98 | "def img2block(im):\n", 99 | " '''\n", 100 | " Image patching code. It patches a given RGB image into 32x32 blocks and returns a 4D array with size \n", 101 | " [number_of_patches,32,32,3]\n", 102 | " '''\n", 103 | " im = im.astype(np.float32)\n", 104 | " row,col,color = im.shape\n", 105 | " im_bl=np.zeros((int(row*col/1024),32,32,3)).astype(np.float32)\n", 106 | " count=0\n", 107 | " for i in range(0,row-row%32,32):\n", 108 | " for j in range(0,col-col%32,32):\n", 109 | " im_bl[count,:,:,:]=im[i:i+32,j:j+32,:]\n", 110 | " count = count +1\n", 111 | " im_bl=im_bl/255.\n", 112 | " return im_bl\n", 113 | "\n", 114 | "def block2img(img_blocks,img_size):\n", 115 | " '''\n", 116 | " Function for reconstructing the image back from patches\n", 117 | " '''\n", 118 | " row,col = img_size\n", 119 | " img=np.zeros((row,col,3)).astype(np.float32)\n", 120 | " n,k,l,c=img_blocks.shape\n", 121 | " \n", 122 | " for i in range(0,int(row/k)):\n", 123 | " for j in range(0,int(col/k)):\n", 124 | " img[i*k:(i+1)*k,j*l:(j+1)*l,:]=img_blocks[int(i*col/k+j),:,:,:]\n", 125 | " return img\n", 126 | "\n", 127 | "#Get the patches of lena image\n", 128 | "lena_img = skimage.io.imread('../test_img/lena512color.tiff')\n", 129 | "lena_32=img2block(lena_img)\n", 130 | " \n", 131 | " " 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "collapsed": true 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "def convert2uint8(img):\n", 143 | " img[img>255]=255\n", 144 | " img[img<0]=0\n", 145 | " return img.astype(np.uint8)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "# Part-1 - MLP" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": { 159 | "collapsed": false 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "x_tr = cf10_tr_vec.astype(np.float32)/255.\n", 164 | "x_test = cf10_test_vec.astype(np.float32)/255.\n", 165 | "x_test=x_test[:1000,:]" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": { 172 | "collapsed": false 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "\n", 177 | "def mlp1(x, hidden_sizes, activation_fn=tf.nn.relu,dropout_rate=1.0,std_dev=1.0):\n", 178 | " if not isinstance(hidden_sizes, (list, tuple)):\n", 179 | " raise ValueError(\"hidden_sizes must be a list or a tuple\")\n", 180 | " scope_args = {'initializer': tf.random_normal_initializer(stddev=std_dev)}\n", 181 | " for k in range(len(hidden_sizes)-1):\n", 182 | " layer_name=\"weights\"+str(k)\n", 183 | " #FC layers\n", 184 | " with tf.variable_scope(layer_name, **scope_args):\n", 185 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes[k]])\n", 186 | " b = tf.get_variable('b', shape=[hidden_sizes[k]])\n", 187 | " x = activation_fn(tf.matmul(x, W) + b)\n", 188 | " #Dropout before the last layer\n", 189 | " x = tf.nn.dropout(x, keep_prob=dropout_rate)\n", 190 | " #Softmax layer\n", 191 | " with tf.variable_scope('outlayer', **scope_args):\n", 192 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes[-1]])\n", 193 | " b = tf.get_variable('b', shape=[hidden_sizes[-1]])\n", 194 | " return tf.matmul(x, W) + b" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": { 201 | "collapsed": false 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "import tensorflow as tf\n", 206 | "\n", 207 | "def mlp2(x, hidden_sizes_1,hidden_sizes_2, activation_fn=tf.nn.relu,dropout_rate=1.0,std_dev=1.0,cons_mult=1):\n", 208 | " scope_args = {'initializer': tf.random_normal_initializer(stddev=std_dev)}\n", 209 | " for k in range(len(hidden_sizes_1)-1):\n", 210 | " layer_name=\"weights_enc\"+str(k)\n", 211 | " #FC layers\n", 212 | " with tf.variable_scope(layer_name, **scope_args):\n", 213 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes_1[k]])\n", 214 | " b = tf.get_variable('b', shape=[hidden_sizes_1[k]])\n", 215 | " x = activation_fn(tf.matmul(x, W) + cons_mult*b)\n", 216 | " #Dropout before the last layer\n", 217 | " x = tf.nn.dropout(x, keep_prob=dropout_rate)\n", 218 | " #Softmax layer\n", 219 | " with tf.variable_scope('outlayer_enc', **scope_args):\n", 220 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes_1[-1]])\n", 221 | " b = tf.get_variable('b', shape=[hidden_sizes_1[-1]])\n", 222 | " x = activation_fn(tf.matmul(x, W) + cons_mult*b)\n", 223 | " \n", 224 | " x_quant = tf.round(x*255.)/255.\n", 225 | " \n", 226 | " for k in range(len(hidden_sizes_2)-1):\n", 227 | " layer_name=\"weights_dec\"+str(k)\n", 228 | " #FC layers\n", 229 | " with tf.variable_scope(layer_name, **scope_args):\n", 230 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes_2[k]])\n", 231 | " b = tf.get_variable('b', shape=[hidden_sizes_2[k]])\n", 232 | " x = activation_fn(tf.matmul(x, W) + cons_mult*b)\n", 233 | " x_quant=(activation_fn(tf.matmul(x_quant, W) + cons_mult*b))\n", 234 | " #Dropout before the last layer\n", 235 | " x = tf.nn.dropout(x, keep_prob=dropout_rate)\n", 236 | " #Softmax layer\n", 237 | " with tf.variable_scope('outlayer_dec', **scope_args):\n", 238 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes_2[-1]])\n", 239 | " b = tf.get_variable('b', shape=[hidden_sizes_2[-1]])\n", 240 | " \n", 241 | " return (tf.matmul(x, W) + cons_mult*b,tf.matmul(x_quant, W) + cons_mult*b)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "metadata": { 248 | "collapsed": false 249 | }, 250 | "outputs": [], 251 | "source": [ 252 | "import pprint\n", 253 | "\n", 254 | "def test_classification(model_function, learning_rate=0.1):\n", 255 | "\n", 256 | " with tf.Graph().as_default() as g:\n", 257 | " # where are you going to allocate memory and perform computations\n", 258 | " with tf.device(\"/gpu:0\"):\n", 259 | " x_ = tf.placeholder(tf.float32, [None, 1024])\n", 260 | " y_logits,x_recon = model_function(x_)\n", 261 | " loss = tf.reduce_mean(tf.subtract(x_,y_logits)**2)\n", 262 | " loss2=tf.reduce_mean(tf.subtract(x_,x_recon)**2)\n", 263 | " train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)\n", 264 | " \n", 265 | "\n", 266 | " with g.as_default(), tf.Session() as sess:\n", 267 | " # that is how we \"execute\" statements \n", 268 | " # (return None, e.g. init() or train_op())\n", 269 | " # or compute parts of graph defined above (loss, output, etc.)\n", 270 | " # given certain input (x_, y_)\n", 271 | " #sess.run(tf.initialize_all_variables())\n", 272 | " sess.run(tf.global_variables_initializer())\n", 273 | " \n", 274 | " # train\n", 275 | " ids=[i for i in range(100)]\n", 276 | " for iter_i in range(100001):\n", 277 | " batch_xs = x_tr[ids,:] \n", 278 | " ids=[(ids[0]+100+i)%x_tr.shape[0] for i in range(100)]\n", 279 | " sess.run(train_step, feed_dict={x_: batch_xs})\n", 280 | " \n", 281 | " # test trained model\n", 282 | " if iter_i % 1000 == 0:\n", 283 | " tf_feed_dict = {x_: batch_xs}\n", 284 | " loss_val = sess.run(loss, feed_dict=tf_feed_dict)\n", 285 | " print('iteration %d\\t train mse: %.3f\\t'%(iter_i,loss_val))\n", 286 | " if iter_i%5000 == 0:\n", 287 | " \n", 288 | " loss_val_test = sess.run(loss, feed_dict={x_:x_test})\n", 289 | " loss_val2_test = sess.run(loss2, feed_dict={x_:x_test})\n", 290 | " print('iteration %d\\t TEST MSE: %.3f\\t %.3f\\t'%(iter_i,loss_val_test,loss_val2_test))\n", 291 | " \n", 292 | " \n", 293 | " x_from_tr=sess.run(y_logits, feed_dict={x_:batch_xs[:5,:].reshape([-1,1024])})\n", 294 | " x_from_test=sess.run(y_logits, feed_dict={x_:x_test[0:5,:].reshape([-1,1024])})\n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | "#test_classification(lambda x: mlp1(x, [850,700,500,700,850,1024],\n", 299 | "# activation_fn=tf.nn.relu,std_dev=1e-1), learning_rate=1e-3)\n", 300 | "\n", 301 | "test_classification(lambda x: mlp2(x, [850,700,500],[700,850,1024],\n", 302 | " activation_fn=tf.nn.relu,std_dev=1,cons_mult=0.5), learning_rate=1e-3)" 303 | ] 304 | } 305 | ], 306 | "metadata": { 307 | "kernelspec": { 308 | "display_name": "Python 3", 309 | "language": "python", 310 | "name": "python3" 311 | }, 312 | "language_info": { 313 | "codemirror_mode": { 314 | "name": "ipython", 315 | "version": 3 316 | }, 317 | "file_extension": ".py", 318 | "mimetype": "text/x-python", 319 | "name": "python", 320 | "nbconvert_exporter": "python", 321 | "pygments_lexer": "ipython3", 322 | "version": "3.6.0" 323 | } 324 | }, 325 | "nbformat": 4, 326 | "nbformat_minor": 2 327 | } 328 | -------------------------------------------------------------------------------- /Autoencoders/deprecated/MLP-AE.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import tensorflow as tf\n", 12 | "import skimage.io\n", 13 | "import skimage.color\n", 14 | "import numpy as np\n", 15 | "from pprint import pprint" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": { 22 | "collapsed": false 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "import read_cifar10 as cf10\n", 27 | "\n", 28 | "#@read_data.restartable\n", 29 | "def cifar10_dataset_generator(dataset_name, batch_size, restrict_size=1000):\n", 30 | " assert dataset_name in ['train', 'test']\n", 31 | " assert batch_size > 0 or batch_size == -1 # -1 for entire dataset\n", 32 | " \n", 33 | " X_all_unrestricted, y_all = (cf10.load_training_data() if dataset_name == 'train'\n", 34 | " else cf10.load_test_data())\n", 35 | " \n", 36 | " actual_restrict_size = restrict_size if dataset_name == 'train' else int(1e10)\n", 37 | " X_all = X_all_unrestricted[:actual_restrict_size]\n", 38 | " data_len = X_all.shape[0]\n", 39 | " batch_size = batch_size if batch_size > 0 else data_len\n", 40 | " \n", 41 | " X_all_padded = np.concatenate([X_all, X_all[:batch_size]], axis=0)\n", 42 | " y_all_padded = np.concatenate([y_all, y_all[:batch_size]], axis=0)\n", 43 | " \n", 44 | " for slice_i in range(math.ceil(data_len / batch_size)):\n", 45 | " idx = slice_i * batch_size\n", 46 | " #X_batch = X_all_padded[idx:idx + batch_size]\n", 47 | " X_batch = X_all_padded[idx:idx + batch_size]*255 # bugfix: thanks Zezhou Sun!\n", 48 | " y_batch = np.ravel(y_all_padded[idx:idx + batch_size])\n", 49 | " yield X_batch.astype(np.uint8), y_batch.astype(np.uint8)\n", 50 | "\n", 51 | "cifar10_dataset_generators = {\n", 52 | " 'train': cifar10_dataset_generator('train', 1000),\n", 53 | " 'test': cifar10_dataset_generator('test', -1)\n", 54 | "}\n" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 3, 60 | "metadata": { 61 | "collapsed": false 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "#Load cifar-10 data\n", 66 | "cf10_tr=cf10.load_training_data()\n", 67 | "cf10_tr_img=cf10_tr[0]\n", 68 | "cf10_tr_label = cf10_tr[1]\n", 69 | "\n", 70 | "cf10_test=cf10.load_test_data()\n", 71 | "cf10_test_img=cf10_test[0]\n", 72 | "cf10_test_label = cf10_test[1]" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 4, 78 | "metadata": { 79 | "collapsed": false 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "cf10_test_img_gray=(cf10_test_img[:,:,:,0]+cf10_test_img[:,:,:,1]+cf10_test_img[:,:,:,2])/3.\n", 84 | "cf10_tr_img_gray=(cf10_tr_img[:,:,:,0]+cf10_tr_img[:,:,:,1]+cf10_tr_img[:,:,:,2])/3.\n", 85 | "cf10_tr_vec=np.zeros((50000,1024))\n", 86 | "cf10_test_vec=np.zeros((10000,1024))" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 5, 92 | "metadata": { 93 | "collapsed": false 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "import skimage.io\n", 98 | "def img2block(im):\n", 99 | " '''\n", 100 | " Image patching code. It patches a given RGB image into 32x32 blocks and returns a 4D array with size \n", 101 | " [number_of_patches,32,32,3]\n", 102 | " '''\n", 103 | " im = im.astype(np.float32)\n", 104 | " row,col,color = im.shape\n", 105 | " im_bl=np.zeros((int(row*col/1024),32,32,3)).astype(np.float32)\n", 106 | " count=0\n", 107 | " for i in range(0,row-row%32,32):\n", 108 | " for j in range(0,col-col%32,32):\n", 109 | " im_bl[count,:,:,:]=im[i:i+32,j:j+32,:]\n", 110 | " count = count +1\n", 111 | " im_bl=im_bl/255.\n", 112 | " return im_bl\n", 113 | "\n", 114 | "def block2img(img_blocks,img_size):\n", 115 | " '''\n", 116 | " Function for reconstructing the image back from patches\n", 117 | " '''\n", 118 | " row,col = img_size\n", 119 | " img=np.zeros((row,col,3)).astype(np.float32)\n", 120 | " n,k,l,c=img_blocks.shape\n", 121 | " \n", 122 | " for i in range(0,int(row/k)):\n", 123 | " for j in range(0,int(col/k)):\n", 124 | " img[i*k:(i+1)*k,j*l:(j+1)*l,:]=img_blocks[int(i*col/k+j),:,:,:]\n", 125 | " return img\n", 126 | "\n", 127 | "#Get the patches of lena image\n", 128 | "lena_img = skimage.io.imread('../test_img/lena512color.tiff')\n", 129 | "lena_32=img2block(lena_img)\n", 130 | " \n", 131 | " " 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 6, 137 | "metadata": { 138 | "collapsed": true 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "def convert2uint8(img):\n", 143 | " img[img>255]=255\n", 144 | " img[img<0]=0\n", 145 | " return img.astype(np.uint8)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "# Part-1 - MLP" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 8, 158 | "metadata": { 159 | "collapsed": false 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "x_tr = cf10_tr_vec.astype(np.float32)/255.\n", 164 | "x_test = cf10_test_vec.astype(np.float32)/255.\n", 165 | "x_test=x_test[:1000,:]" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 9, 171 | "metadata": { 172 | "collapsed": false 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "\n", 177 | "def mlp1(x, hidden_sizes, activation_fn=tf.nn.relu,dropout_rate=1.0,std_dev=1.0):\n", 178 | " if not isinstance(hidden_sizes, (list, tuple)):\n", 179 | " raise ValueError(\"hidden_sizes must be a list or a tuple\")\n", 180 | " scope_args = {'initializer': tf.random_normal_initializer(stddev=std_dev)}\n", 181 | " for k in range(len(hidden_sizes)-1):\n", 182 | " layer_name=\"weights\"+str(k)\n", 183 | " #FC layers\n", 184 | " with tf.variable_scope(layer_name, **scope_args):\n", 185 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes[k]])\n", 186 | " b = tf.get_variable('b', shape=[hidden_sizes[k]])\n", 187 | " x = activation_fn(tf.matmul(x, W) + b)\n", 188 | " #Dropout before the last layer\n", 189 | " x = tf.nn.dropout(x, keep_prob=dropout_rate)\n", 190 | " #Softmax layer\n", 191 | " with tf.variable_scope('outlayer', **scope_args):\n", 192 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes[-1]])\n", 193 | " b = tf.get_variable('b', shape=[hidden_sizes[-1]])\n", 194 | " return tf.matmul(x, W) + b" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 10, 200 | "metadata": { 201 | "collapsed": false 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "import tensorflow as tf\n", 206 | "\n", 207 | "def mlp2(x, hidden_sizes_1,hidden_sizes_2, activation_fn=tf.nn.relu,dropout_rate=1.0,std_dev=1.0,cons_mult=1):\n", 208 | " scope_args = {'initializer': tf.random_normal_initializer(stddev=std_dev)}\n", 209 | " for k in range(len(hidden_sizes_1)-1):\n", 210 | " layer_name=\"weights_enc\"+str(k)\n", 211 | " #FC layers\n", 212 | " with tf.variable_scope(layer_name, **scope_args):\n", 213 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes_1[k]])\n", 214 | " b = tf.get_variable('b', shape=[hidden_sizes_1[k]])\n", 215 | " x = activation_fn(tf.matmul(x, W) + cons_mult*b)\n", 216 | " #Dropout before the last layer\n", 217 | " x = tf.nn.dropout(x, keep_prob=dropout_rate)\n", 218 | " #Softmax layer\n", 219 | " with tf.variable_scope('outlayer_enc', **scope_args):\n", 220 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes_1[-1]])\n", 221 | " b = tf.get_variable('b', shape=[hidden_sizes_1[-1]])\n", 222 | " x = activation_fn(tf.matmul(x, W) + cons_mult*b)\n", 223 | " \n", 224 | " x_quant = tf.round(x*255.)/255.\n", 225 | " \n", 226 | " for k in range(len(hidden_sizes_2)-1):\n", 227 | " layer_name=\"weights_dec\"+str(k)\n", 228 | " #FC layers\n", 229 | " with tf.variable_scope(layer_name, **scope_args):\n", 230 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes_2[k]])\n", 231 | " b = tf.get_variable('b', shape=[hidden_sizes_2[k]])\n", 232 | " x = activation_fn(tf.matmul(x, W) + cons_mult*b)\n", 233 | " x_quant=(activation_fn(tf.matmul(x_quant, W) + cons_mult*b))\n", 234 | " #Dropout before the last layer\n", 235 | " x = tf.nn.dropout(x, keep_prob=dropout_rate)\n", 236 | " #Softmax layer\n", 237 | " with tf.variable_scope('outlayer_dec', **scope_args):\n", 238 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes_2[-1]])\n", 239 | " b = tf.get_variable('b', shape=[hidden_sizes_2[-1]])\n", 240 | " \n", 241 | " return (tf.matmul(x, W) + cons_mult*b,tf.matmul(x_quant, W) + cons_mult*b)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 13, 247 | "metadata": { 248 | "collapsed": false 249 | }, 250 | "outputs": [ 251 | { 252 | "ename": "NameError", 253 | "evalue": "name 'train_step' is not defined", 254 | "output_type": "error", 255 | "traceback": [ 256 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 257 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 258 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 49\u001b[0m test_classification(lambda x: mlp2(x, [850,700,500],[700,850,1024],\n\u001b[0;32m---> 50\u001b[0;31m activation_fn=tf.nn.relu,std_dev=1,cons_mult=0.5), learning_rate=1e-3)\n\u001b[0m", 259 | "\u001b[0;32m\u001b[0m in \u001b[0;36mtest_classification\u001b[0;34m(model_function, learning_rate)\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0mbatch_xs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx_tr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mids\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0mids\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mids\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m%\u001b[0m\u001b[0mx_tr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 27\u001b[0;31m \u001b[0msess\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_step\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0mx_\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mbatch_xs\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 28\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0;31m# test trained model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 260 | "\u001b[0;31mNameError\u001b[0m: name 'train_step' is not defined" 261 | ] 262 | } 263 | ], 264 | "source": [ 265 | "import pprint\n", 266 | "\n", 267 | "def test_classification(model_function, learning_rate=0.1):\n", 268 | "\n", 269 | " with tf.Graph().as_default() as g:\n", 270 | " # where are you going to allocate memory and perform computations\n", 271 | " with tf.device(\"/gpu:0\"):\n", 272 | " \n", 273 | " # define model \"input placeholders\", i.e. variables that are\n", 274 | " # going to be substituted with input data on train/test time\n", 275 | " x_ = tf.placeholder(tf.float32, [None, 1024])\n", 276 | " #y_logits = model_function(x_)\n", 277 | " y_logits,x_recon = model_function(x_)\n", 278 | "\n", 279 | " \n", 280 | " # naive implementation of loss:\n", 281 | " # > losses = y_ * tf.log(tf.nn.softmax(y_logits))\n", 282 | " # > tf.reduce_mean(-tf.reduce_sum(losses, 1))\n", 283 | " # can be numerically unstable.\n", 284 | " #\n", 285 | " # so here we use tf.nn.softmax_cross_entropy_with_logits on the raw\n", 286 | " # outputs of 'y', and then average across the batch.\n", 287 | " \n", 288 | " loss = tf.reduce_mean(tf.subtract(x_,y_logits)**2)\n", 289 | " loss2=tf.reduce_mean(tf.subtract(x_,x_recon)**2)\n", 290 | " train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)\n", 291 | " \n", 292 | " #y_pred = tf.argmax(tf.nn.softmax(y_logits), dimension=1)\n", 293 | " #correct_prediction = tf.equal(y_pred, tf.argmax(y_, 1))\n", 294 | " #accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n", 295 | "\n", 296 | " with g.as_default(), tf.Session() as sess:\n", 297 | " # that is how we \"execute\" statements \n", 298 | " # (return None, e.g. init() or train_op())\n", 299 | " # or compute parts of graph defined above (loss, output, etc.)\n", 300 | " # given certain input (x_, y_)\n", 301 | " #sess.run(tf.initialize_all_variables())\n", 302 | " sess.run(tf.global_variables_initializer())\n", 303 | " \n", 304 | " # train\n", 305 | " ids=[i for i in range(100)]\n", 306 | " for iter_i in range(100001):\n", 307 | " batch_xs = x_tr[ids,:] \n", 308 | " ids=[(ids[0]+100+i)%x_tr.shape[0] for i in range(100)]\n", 309 | " sess.run(train_step, feed_dict={x_: batch_xs})\n", 310 | " \n", 311 | " # test trained model\n", 312 | " if iter_i % 1000 == 0:\n", 313 | " tf_feed_dict = {x_: batch_xs}\n", 314 | " loss_val = sess.run(loss, feed_dict=tf_feed_dict)\n", 315 | " print('iteration %d\\t train mse: %.3f\\t'%(iter_i,loss_val))\n", 316 | " if iter_i%5000 == 0:\n", 317 | " \n", 318 | " loss_val_test = sess.run(loss, feed_dict={x_:x_test})\n", 319 | " loss_val2_test = sess.run(loss2, feed_dict={x_:x_test})\n", 320 | " print('iteration %d\\t TEST MSE: %.3f\\t %.3f\\t'%(iter_i,loss_val_test,loss_val2_test))\n", 321 | " \n", 322 | " \n", 323 | " x_from_tr=sess.run(y_logits, feed_dict={x_:batch_xs[:5,:].reshape([-1,1024])})\n", 324 | " x_from_test=sess.run(y_logits, feed_dict={x_:x_test[0:5,:].reshape([-1,1024])})\n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | "#test_classification(lambda x: mlp1(x, [850,700,500,700,850,1024],\n", 329 | "# activation_fn=tf.nn.relu,std_dev=1e-1), learning_rate=1e-3)\n", 330 | "\n", 331 | "test_classification(lambda x: mlp2(x, [850,700,500],[700,850,1024],\n", 332 | " activation_fn=tf.nn.relu,std_dev=1,cons_mult=0.5), learning_rate=1e-3)" 333 | ] 334 | } 335 | ], 336 | "metadata": { 337 | "kernelspec": { 338 | "display_name": "Python 3", 339 | "language": "python", 340 | "name": "python3" 341 | }, 342 | "language_info": { 343 | "codemirror_mode": { 344 | "name": "ipython", 345 | "version": 3 346 | }, 347 | "file_extension": ".py", 348 | "mimetype": "text/x-python", 349 | "name": "python", 350 | "nbconvert_exporter": "python", 351 | "pygments_lexer": "ipython3", 352 | "version": "3.6.0" 353 | } 354 | }, 355 | "nbformat": 4, 356 | "nbformat_minor": 2 357 | } 358 | -------------------------------------------------------------------------------- /Autoencoders/deprecated/front_mlp_image_compression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import tensorflow as tf\n", 12 | "import numpy as np" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 31, 18 | "metadata": { 19 | "collapsed": false 20 | }, 21 | "outputs": [ 22 | { 23 | "name": "stdout", 24 | "output_type": "stream", 25 | "text": [ 26 | "[[ 0.3955416 0.12160153 0.19642151 ..., 0.06968317 0.69140428\n", 27 | " 0.15246276]\n", 28 | " [ 0.09525515 0.13920502 0.30888604 ..., 0.6035481 0.45715089\n", 29 | " 0.03781689]\n", 30 | " [ 0.63284166 0.54282435 0.48278776 ..., 0.8935599 0.12680349\n", 31 | " 0.72606722]\n", 32 | " ..., \n", 33 | " [ 0.65537722 0.94637658 0.28784545 ..., 0.73615713 0.53699603\n", 34 | " 0.53415198]\n", 35 | " [ 0.98592569 0.85208744 0.19715099 ..., 0.20379542 0.36096019\n", 36 | " 0.15295724]\n", 37 | " [ 0.82432174 0.38235988 0.89298599 ..., 0.6089612 0.85964109\n", 38 | " 0.08057932]]\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "train_array = np.random.random((1000,10))\n", 44 | "print(train_array)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 32, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "[[ 0.86179411 0.85179411 0.84179411 ..., 0.79179411 0.78179411\n", 59 | " 0.77179411]\n", 60 | " [ 0.84028875 0.83028875 0.82028875 ..., 0.77028875 0.76028875\n", 61 | " 0.75028875]\n", 62 | " [ 0.8935599 0.8835599 0.8735599 ..., 0.8235599 0.8135599\n", 63 | " 0.8035599 ]\n", 64 | " ..., \n", 65 | " [ 0.94637658 0.93637658 0.92637658 ..., 0.87637658 0.86637658\n", 66 | " 0.85637658]\n", 67 | " [ 0.98592569 0.97592569 0.96592569 ..., 0.91592569 0.90592569\n", 68 | " 0.89592569]\n", 69 | " [ 0.9992102 0.9892102 0.9792102 ..., 0.9292102 0.9192102\n", 70 | " 0.9092102 ]]\n" 71 | ] 72 | } 73 | ], 74 | "source": [ 75 | "train_rows = np.shape(train_array)[0]\n", 76 | "train_cols = np.shape(train_array)[1]\n", 77 | "train_labels = np.zeros((train_rows,train_cols))\n", 78 | "for i in range(train_rows):\n", 79 | " for j in range(train_cols):\n", 80 | " if j == 0:\n", 81 | " train_labels[i,j] = max(train_array[i,:])\n", 82 | " else:\n", 83 | " train_labels[i,j] = train_labels[i,(j-1)]-0.01\n", 84 | "print(train_labels)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 33, 90 | "metadata": { 91 | "collapsed": true 92 | }, 93 | "outputs": [], 94 | "source": [ 95 | "def mlp(x, hidden_sizes, activation_fn=tf.nn.relu):\n", 96 | " if not isinstance(hidden_sizes, (list, tuple)):\n", 97 | " raise ValueError(\"hidden_sizes must be a list or a tuple\")\n", 98 | " W = {}\n", 99 | " b = {}\n", 100 | " h = {}\n", 101 | " for i in range(len(hidden_sizes)):\n", 102 | " if(W == {}):\n", 103 | " W[\"W\"+str(i)] = tf.get_variable(\"W\"+str(i),[10,hidden_sizes[i]],tf.float32,tf.random_normal_initializer(stddev = 0.01))\n", 104 | " b[\"b\"+str(i)] = tf.get_variable(\"b\"+str(i),[hidden_sizes[i]],tf.float32,tf.constant_initializer(0.0))\n", 105 | " h[\"h\"+str(i)] = activation_fn(tf.matmul(x,W[\"W\"+str(i)])+b[\"b\"+str(i)])\n", 106 | " elif(i == len(hidden_sizes)-1):\n", 107 | " W[\"W\"+str(i)] = tf.get_variable(\"W\"+str(i),[hidden_sizes[i-1],hidden_sizes[i]],tf.float32,tf.random_normal_initializer(stddev = 0.01))\n", 108 | " b[\"b\"+str(i)] = tf.get_variable(\"b\"+str(i),[hidden_sizes[i]],tf.float32,tf.constant_initializer(0.0))\n", 109 | " return (tf.matmul(h[\"h\"+str(i-1)],W[\"W\"+str(i)])+b[\"b\"+str(i)])\n", 110 | " else:\n", 111 | " W[\"W\"+str(i)] = tf.get_variable(\"W\"+str(i),[hidden_sizes[i-1],hidden_sizes[i]],tf.float32,tf.random_normal_initializer(stddev = 0.01))\n", 112 | " b[\"b\"+str(i)] = tf.get_variable(\"b\"+str(i),[hidden_sizes[i]],tf.float32,tf.constant_initializer(0.0))\n", 113 | " h[\"h\"+str(i)] = activation_fn(tf.matmul(h[\"h\"+str(i-1)],W[\"W\"+str(i)])+b[\"b\"+str(i)])" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 36, 119 | "metadata": { 120 | "collapsed": false 121 | }, 122 | "outputs": [ 123 | { 124 | "name": "stdout", 125 | "output_type": "stream", 126 | "text": [ 127 | "38.729814951\n", 128 | "6.0595560893\n", 129 | "6.05706680437\n", 130 | "6.02788693193\n", 131 | "5.98470501138\n", 132 | "5.93538273533\n", 133 | "5.88351898642\n", 134 | "5.83069233332\n", 135 | "5.77770548453\n", 136 | "5.72507235492\n", 137 | "5.67319154151\n", 138 | "5.62240566206\n", 139 | "5.57302393839\n", 140 | "5.52533032854\n", 141 | "5.47958573092\n", 142 | "5.43602586705\n", 143 | "5.39485779733\n", 144 | "5.35625338649\n", 145 | "5.32034523486\n", 146 | "5.28722085111\n", 147 | "5.25691952612\n", 148 | "5.22943313842\n", 149 | "5.20470543306\n", 150 | "5.18263690189\n", 151 | "5.16309170199\n", 152 | "5.1459032762\n", 153 | "5.13088357053\n", 154 | "5.11783134243\n", 155 | "5.10653970815\n", 156 | "5.09680316552\n", 157 | "5.08842393966\n", 158 | "5.0812146743\n", 159 | "5.07500457537\n", 160 | "5.06963840226\n", 161 | "5.06497901838\n", 162 | "5.06090653618\n", 163 | "5.05731778517\n", 164 | "5.05412607593\n", 165 | "5.05125787257\n", 166 | "5.04865379987\n", 167 | "5.04626417449\n", 168 | "5.04405014315\n", 169 | "5.0419799128\n", 170 | "5.04002777731\n", 171 | "5.03817571074\n", 172 | "5.03640774185\n", 173 | "5.03471274257\n", 174 | "5.03308184768\n", 175 | "5.03150819116\n", 176 | "5.02998716192\n", 177 | "5.0285144496\n", 178 | "5.02708681071\n", 179 | "5.02570242462\n", 180 | "5.02435852672\n", 181 | "5.02305414624\n", 182 | "5.02178762489\n", 183 | "5.02055721052\n", 184 | "5.01936235555\n", 185 | "5.01820169742\n", 186 | "5.0170735857\n", 187 | "5.01597754934\n", 188 | "5.01491204507\n", 189 | "5.01387665722\n", 190 | "5.01287017698\n", 191 | "5.01189135079\n", 192 | "5.01093911126\n", 193 | "5.01001302872\n", 194 | "5.00911176718\n", 195 | "5.00823436711\n", 196 | "5.00738058408\n", 197 | "5.00654913654\n", 198 | "5.00573922131\n", 199 | "5.00495008197\n", 200 | "5.00418131469\n", 201 | "5.00343182909\n", 202 | "5.00270091494\n", 203 | "5.0019883843\n", 204 | "5.0012932969\n", 205 | "5.00061476335\n", 206 | "4.99995301026\n", 207 | "4.99930672\n", 208 | "4.99867537898\n", 209 | "4.99805903135\n", 210 | "4.99745698192\n", 211 | "4.9968685002\n", 212 | "4.99629358655\n", 213 | "4.99573086129\n", 214 | "4.99518101412\n", 215 | "4.99464310828\n", 216 | "4.99411699656\n", 217 | "4.9936020669\n", 218 | "4.99309776659\n", 219 | "4.99260429276\n", 220 | "4.9921211217\n", 221 | "4.99164772976\n", 222 | "4.99118393005\n", 223 | "4.99072962657\n", 224 | "4.99028377012\n", 225 | "4.98984703978\n", 226 | "4.9894188412\n", 227 | "[[ 0.50159783 0.61804251 0.11908296 0.09711203 0.70725984 0.83677861\n", 228 | " 0.26683684 0.34680484 0.04716688 0.13446337]]\n", 229 | "[array([[ 0.83548361, 0.82560116, 0.81568962, 0.80577552, 0.79580408,\n", 230 | " 0.78591537, 0.77600253, 0.76610076, 0.75613427, 0.74623728]], dtype=float32)]\n" 231 | ] 232 | } 233 | ], 234 | "source": [ 235 | "with tf.Graph().as_default():\n", 236 | " x = tf.placeholder(tf.float32,[None,10])\n", 237 | " y = tf.placeholder(tf.float32,[None,10])\n", 238 | " prediction = mlp(x,[10,10,10],tf.sigmoid)\n", 239 | " loss = tf.reduce_sum(tf.square(prediction-y))\n", 240 | " optimizer = tf.train.AdamOptimizer().minimize(loss)\n", 241 | " with tf.Session() as sess:\n", 242 | " sess.run(tf.global_variables_initializer())\n", 243 | " for epoch in range(100):\n", 244 | " loss_avg = 0\n", 245 | " for data in range(1000):\n", 246 | " x_train = np.reshape(train_array[data,:],(1,10))\n", 247 | " y_train = np.reshape(train_labels[data,:],(1,10))\n", 248 | " #print(x_train,y_train)\n", 249 | " _,loss_here = sess.run([optimizer,loss],feed_dict = {x:x_train,y:y_train})\n", 250 | " #print(\"loss:\",loss)\n", 251 | " loss_avg += loss_here\n", 252 | " print(loss_avg/10)\n", 253 | " x_test = np.random.random((1,10))\n", 254 | " print(x_test)\n", 255 | " print(sess.run([prediction],feed_dict = {x:x_test,y:y_train}))\n", 256 | " #sess.run([optimizer],)" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 11, 262 | "metadata": { 263 | "collapsed": false 264 | }, 265 | "outputs": [ 266 | { 267 | "name": "stdout", 268 | "output_type": "stream", 269 | "text": [ 270 | "[ 0.57894346 0.65120785 0.73893972 0.99132392 0.61851509 0.45124925\n", 271 | " 0.09072389 0.85271584 0.85796748 0.97041281] [ 0.99132392 0.98132392 0.97132392 0.96132392 0.95132392 0.94132392\n", 272 | " 0.93132392 0.92132392 0.91132392 0.90132392]\n" 273 | ] 274 | } 275 | ], 276 | "source": [ 277 | "with tf.Graph().as_default():\n", 278 | " x = tf.placeholder(tf.float32,[None,10])\n", 279 | " y = tf.placeholder(tf.float32,[None,10])\n", 280 | " prediction = mlp(x,[10,10,10],tf.sigmoid)\n", 281 | " loss = tf.reduce_sum(tf.square(prediction-y))\n", 282 | " optimizer = tf.train.AdamOptimizer().minimize(loss)\n", 283 | " with tf.Session() as sess:\n", 284 | " sess.run(tf.global_variables_initializer())\n", 285 | " for epoch in range(100):\n", 286 | " loss_avg = 0\n", 287 | " for data in range(1000):\n", 288 | " x_train = np.reshape(train_array[data,:],(1,10))\n", 289 | " y_train = np.reshape(train_labels[data,:],(1,10))\n", 290 | " #print(x_train,y_train)\n", 291 | " _,loss_here = sess.run([optimizer,loss],feed_dict = {x:y_train,y:x_train})\n", 292 | " #print(\"loss:\",loss)\n", 293 | " loss_avg += loss_here\n", 294 | " print(loss_avg/10)\n", 295 | " x_test = np.random.random((1,10))\n", 296 | " print(x_test)\n", 297 | " print(sess.run([prediction],feed_dict = {x:x_test,y:y_train}))" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": null, 303 | "metadata": { 304 | "collapsed": true 305 | }, 306 | "outputs": [], 307 | "source": [] 308 | } 309 | ], 310 | "metadata": { 311 | "anaconda-cloud": {}, 312 | "kernelspec": { 313 | "display_name": "Python [conda root]", 314 | "language": "python", 315 | "name": "conda-root-py" 316 | }, 317 | "language_info": { 318 | "codemirror_mode": { 319 | "name": "ipython", 320 | "version": 3 321 | }, 322 | "file_extension": ".py", 323 | "mimetype": "text/x-python", 324 | "name": "python", 325 | "nbconvert_exporter": "python", 326 | "pygments_lexer": "ipython3", 327 | "version": "3.5.2" 328 | } 329 | }, 330 | "nbformat": 4, 331 | "nbformat_minor": 1 332 | } 333 | -------------------------------------------------------------------------------- /Autoencoders/deprecated/mlp_image_compression.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import read_cifar10 as cf10 3 | import math 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from scipy import fftpack 7 | 8 | batch = 1 9 | number_of_images = 512 10 | 11 | def cifar10_dataset_generator(dataset_name, batch_size, restrict_size=1000): 12 | assert dataset_name in ['train', 'test'] 13 | assert batch_size > 0 or batch_size == -1 # -1 for entire dataset 14 | 15 | X_all_unrestricted, y_all = (cf10.load_training_data() if dataset_name == 'train' 16 | else cf10.load_test_data()) 17 | 18 | actual_restrict_size = restrict_size if dataset_name == 'train' else int(1e10) 19 | X_all = X_all_unrestricted[:actual_restrict_size] 20 | data_len = X_all.shape[0] 21 | batch_size = batch_size if batch_size > 0 else data_len 22 | 23 | X_all_padded = np.concatenate([X_all, X_all[:batch_size]], axis=0) 24 | y_all_padded = np.concatenate([y_all, y_all[:batch_size]], axis=0) 25 | 26 | for slice_i in range(math.ceil(data_len / batch_size)): 27 | idx = slice_i * batch_size 28 | X_batch = X_all_padded[idx:idx + batch_size] 29 | y_batch = np.ravel(y_all_padded[idx:idx + batch_size]) 30 | yield X_batch.astype(np.float32), y_batch.astype(np.uint8) 31 | 32 | 33 | def mlp(x, hidden_sizes, activation_fn=tf.nn.relu): 34 | if not isinstance(hidden_sizes, (list, tuple)): 35 | raise ValueError("hidden_sizes must be a list or a tuple") 36 | W = {} 37 | b = {} 38 | h = {} 39 | for i in range(len(hidden_sizes)): 40 | if(W == {}): 41 | W["W"+str(i)] = tf.get_variable("W"+str(i),[1024,hidden_sizes[i]],tf.float32,tf.random_normal_initializer(stddev = 0.01)) 42 | b["b"+str(i)] = tf.get_variable("b"+str(i),[hidden_sizes[i]],tf.float32,tf.constant_initializer(0.0)) 43 | h["h"+str(i)] = activation_fn(tf.matmul(x,W["W"+str(i)])+b["b"+str(i)]) 44 | elif(i == len(hidden_sizes)-1): 45 | W["W"+str(i)] = tf.get_variable("W"+str(i),[hidden_sizes[i-1],hidden_sizes[i]],tf.float32,tf.random_normal_initializer(stddev = 0.01)) 46 | b["b"+str(i)] = tf.get_variable("b"+str(i),[hidden_sizes[i]],tf.float32,tf.constant_initializer(0.0)) 47 | return (tf.matmul(h["h"+str(i-1)],W["W"+str(i)])+b["b"+str(i)]) 48 | else: 49 | W["W"+str(i)] = tf.get_variable("W"+str(i),[hidden_sizes[i-1],hidden_sizes[i]],tf.float32,tf.random_normal_initializer(stddev = 0.01)) 50 | b["b"+str(i)] = tf.get_variable("b"+str(i),[hidden_sizes[i]],tf.float32,tf.constant_initializer(0.0)) 51 | h["h"+str(i)] = activation_fn(tf.matmul(h["h"+str(i-1)],W["W"+str(i)])+b["b"+str(i)]) 52 | 53 | 54 | with tf.Graph().as_default(): 55 | x = tf.placeholder(tf.float32,[None,32,32,3]) 56 | y = tf.placeholder(tf.float32,[None,1024]) 57 | cropped_x = x[:,:,:,0:1]; 58 | reshaped_x = tf.reshape(cropped_x,[batch,1024]) 59 | mlp_dct = mlp(reshaped_x,[1024,1024,1024]) 60 | 61 | #cropped_y = y[:,:,:,0:1] 62 | #reshaped_y = tf.reshape(cropped_y,[batch,1024]) 63 | 64 | 65 | binary = tf.equal(mlp_dct,y) 66 | 67 | #accuracy and loss of the predicted results 68 | accuracy = tf.reduce_mean(tf.cast(binary,tf.float32)) 69 | loss = tf.reduce_sum(tf.square(mlp_dct-y)) 70 | mse = loss/(32*32) 71 | r = tf.reduce_max(abs(mlp_dct-y)) 72 | psnr = 10*tf.log((r**2)/mse) 73 | 74 | optimizer = tf.train.AdamOptimizer(0.2).minimize(loss) 75 | 76 | 77 | with tf.Session() as sess: 78 | sess.run(tf.global_variables_initializer()) 79 | 80 | for epoch_i in range(10): 81 | 82 | for iter_i, data_batch in enumerate(cifar10_dataset_generator('train', number_of_images)): 83 | loss_total = 0 84 | psnr_total = 0 85 | count = 0 86 | x_train_batch,y_train = data_batch 87 | for x_train in x_train_batch: 88 | cropped_y = x_train[:,:,0:1] 89 | dct_y = fftpack.dct(cropped_y) 90 | reshaped_y = np.reshape(dct_y,[batch,1024]) 91 | x_train = np.reshape(x_train,[batch,32,32,3]) 92 | #train_feed_dict = dict(zip([x,y], data_batch)) 93 | _,loss_actual,psnr_actual,prediction = sess.run([optimizer,loss,psnr,mlp_dct], feed_dict={x:x_train,y:reshaped_y}) 94 | loss_total += loss_actual 95 | psnr_total += psnr_actual 96 | count += 1 97 | print("--------train image--------") 98 | print(reshaped_y) 99 | print("------prediction----------") 100 | print(prediction) 101 | print("-----------loss-------------") 102 | print(count,(loss_total/number_of_images),(psnr_total/number_of_images)) 103 | #to_compute = [loss,psnr] 104 | #loss,psnr = sess.run(to_compute,feed_dict = {x:x_train,y:y_train}) 105 | #print(loss,psnr) 106 | 107 | -------------------------------------------------------------------------------- /Autoencoders/functions/.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 12 | 13 | 14 | 15 | 16 | 17 | 23 | 24 | 25 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 52 | 53 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 78 | 79 | 80 | 82 | 83 | 84 | 85 | 1500653040185 86 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /GAN-AE/README.md: -------------------------------------------------------------------------------- 1 | ## How the GAN-AE code works: 2 | 3 | Arrange parameters by selecting options from FLAGS and changing the parameters in GAN_AE class variables . Using a good set of parameters is important since the network is a little parameter-sensitive. Suggested parameters are given in the report. Test on high resolution images (for example: Lena) is given since it required more work. Cifar test images are given in the GAN_AE class variables, can be simulated easily. 4 | 5 | Different loss options: 6 | - Wasserstein loss 7 | - DCGAN loss 8 | - Improved Wasserstein loss 9 | - Wasserstein + L1 loss 10 | - Wasserstein + L2 loss 11 | - DCGAN + L1 loss 12 | - DCGAN + L2 loss 13 | - Improved Wasserstein + L2 loss 14 | 15 | Adding L1 distance did not work as good as adding L2 distance to the loss function. Improved wasserstein loss did not work better than Wasserstein loss. These facts will be explored more. 16 | 17 | Suggestions : 18 | 19 | - Use RmsProp for Wasserstein, Adam for DCGAN. 20 | - Choose lower learning rate for Wasserstein, a little higher for DCGAN 21 | - More than 500.000 iterations is suggested to get good reconstructed images. At each iteration, we are sampling from data to construct a batch. 22 | - Choose alpha parameter around/more than 10 (weight of the similarity loss) 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /GAN-AE/lena.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/GAN-AE/lena.tiff -------------------------------------------------------------------------------- /GAN-AE/main.py: -------------------------------------------------------------------------------- 1 | #got help from srez repository in choosing some parameters and writing a couple of utility functions. 2 | 3 | from __future__ import print_function 4 | import numpy as np 5 | import tensorflow as tf 6 | import models 7 | from models import * 8 | 9 | FLAGS = tf.flags.FLAGS 10 | tf.flags.DEFINE_integer("batch_size", "64", "batch size for training") 11 | tf.flags.DEFINE_float("learning_rate", "2e-5", "learning rate for optimizers") 12 | tf.flags.DEFINE_float("optimizer_param", "0.5", "beta1 for adam-decay for RMSProp") 13 | tf.flags.DEFINE_float("iterations", "500000", "training iterations") 14 | tf.flags.DEFINE_string("optimizer", "RMSProp", "RMSProp/Adam") 15 | tf.flags.DEFINE_string("loss_type", "wasserstein_l2_loss", "dcgan/wasserstein/imp_wasserstein/wasserstein_l1_loss/wasserstein_l2_loss/imp_wasserstein_l2_loss") 16 | 17 | def main(argv=None): 18 | discriminator_dims = [3, 16, 64, 1] 19 | kernel_encoder = [5,7,9] 20 | kernel_decoder = [9,7,5] 21 | encoder_dims = [64,16,3] 22 | decoder_dims = [16,32,3] 23 | 24 | print("stage 1") 25 | model = models.GAN_AE(FLAGS.batch_size, 26 | clip_values=(-0.01, 0.01), disc_iterations=5, num_train_data=38400, num_test_data=6400, folder='gan_ae') 27 | print("stage 2") 28 | model.create_model(discriminator_dims, kernel_encoder, kernel_decoder, encoder_dims, decoder_dims, "RMSProp", FLAGS.learning_rate, 29 | FLAGS.optimizer_param, FLAGS.loss_type) 30 | print("stage 3") 31 | model.train_model(FLAGS.batch_size, int(FLAGS.iterations)) 32 | 33 | if __name__ == "__main__": 34 | tf.app.run() 35 | -------------------------------------------------------------------------------- /GAN-AE/read_cifar10.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | #### 3 | # COPIED FROM https://github.com/Hvass-Labs/TensorFlow-Tutorials/ 4 | # and modified 5 | 6 | 7 | ######################################################################## 8 | # 9 | # Functions for downloading the CIFAR-10 data-set from the internet 10 | # and loading it into memory. 11 | # 12 | # Implemented in Python 3.5 13 | # 14 | # Usage: 15 | # 1) Set the variable data_path with the desired storage path. 16 | # 2) Call maybe_download_and_extract() to download the data-set 17 | # if it is not already located in the given data_path. 18 | # 3) Call load_class_names() to get an array of the class-names. 19 | # 4) Call load_training_data() and load_test_data() to get 20 | # the images, class-numbers and one-hot encoded class-labels 21 | # for the training-set and test-set. 22 | # 5) Use the returned data in your own program. 23 | # 24 | # Format: 25 | # The images for the training- and test-sets are returned as 4-dim numpy 26 | # arrays each with the shape: [image_number, height, width, channel] 27 | # where the individual pixels are floats between 0.0 and 1.0. 28 | # 29 | ######################################################################## 30 | # 31 | # This file is part of the TensorFlow Tutorials available at: 32 | # 33 | # https://github.com/Hvass-Labs/TensorFlow-Tutorials 34 | # 35 | # Published under the MIT License. See the file LICENSE for details. 36 | # 37 | # Copyright 2016 by Magnus Erik Hvass Pedersen 38 | # 39 | ######################################################################## 40 | 41 | import numpy as np 42 | import pickle 43 | 44 | import sys 45 | import os 46 | import six.moves.urllib as urllib 47 | import tarfile 48 | import zipfile 49 | 50 | 51 | ######################################################################## 52 | 53 | 54 | def _print_download_progress(count, block_size, total_size): 55 | """ 56 | Function used for printing the download progress. 57 | Used as a call-back function in maybe_download_and_extract(). 58 | """ 59 | 60 | # Percentage completion. 61 | pct_complete = float(count * block_size) / total_size 62 | 63 | # Status-message. Note the \r which means the line should overwrite itself. 64 | msg = "\r- Download progress: {0:.1%}".format(pct_complete) 65 | 66 | # Print it. 67 | sys.stdout.write(msg) 68 | sys.stdout.flush() 69 | 70 | 71 | ######################################################################## 72 | 73 | 74 | def dataset_maybe_download_and_extract(url, download_dir): 75 | """ 76 | Download and extract the data if it doesn't already exist. 77 | Assumes the url is a tar-ball file. 78 | :param url: 79 | Internet URL for the tar-file to download. 80 | Example: "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" 81 | :param download_dir: 82 | Directory where the downloaded file is saved. 83 | Example: "data/CIFAR-10/" 84 | :return: 85 | Nothing. 86 | """ 87 | 88 | # Filename for saving the file downloaded from the internet. 89 | # Use the filename from the URL and add it to the download_dir. 90 | filename = url.split('/')[-1] 91 | file_path = os.path.join(download_dir, filename) 92 | 93 | # Check if the file already exists. 94 | # If it exists then we assume it has also been extracted, 95 | # otherwise we need to download and extract it now. 96 | if not os.path.exists(file_path): 97 | # Check if the download directory exists, otherwise create it. 98 | if not os.path.exists(download_dir): 99 | os.makedirs(download_dir) 100 | 101 | # Download the file from the internet. 102 | file_path, _ = urllib.request.urlretrieve(url=url, 103 | filename=file_path, 104 | reporthook=_print_download_progress) 105 | 106 | print() 107 | print("Download finished. Extracting files.") 108 | 109 | if file_path.endswith(".zip"): 110 | # Unpack the zip-file. 111 | zipfile.ZipFile(file=file_path, mode="r").extractall(download_dir) 112 | elif file_path.endswith((".tar.gz", ".tgz")): 113 | # Unpack the tar-ball. 114 | tarfile.open(name=file_path, mode="r:gz").extractall(download_dir) 115 | 116 | print("Done.") 117 | 118 | 119 | ######################################################################## 120 | 121 | 122 | ######################################################################## 123 | 124 | # Directory where you want to download and save the data-set. 125 | # Set this before you start calling any of the functions below. 126 | data_path = "./CIFAR-10/" 127 | 128 | # URL for the data-set on the internet. 129 | data_url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" 130 | 131 | ######################################################################## 132 | # Various constants for the size of the images. 133 | # Use these constants in your own program. 134 | 135 | # Width and height of each image. 136 | img_size = 32 137 | 138 | # Number of channels in each image, 3 channels: Red, Green, Blue. 139 | num_channels = 3 140 | 141 | # Length of an image when flattened to a 1-dim array. 142 | img_size_flat = img_size * img_size * num_channels 143 | 144 | # Number of classes. 145 | num_classes = 10 146 | 147 | ######################################################################## 148 | # Various constants used to allocate arrays of the correct size. 149 | 150 | # Number of files for the training-set. 151 | _num_files_train = 5 152 | 153 | # Number of images for each batch-file in the training-set. 154 | _images_per_file = 10000 155 | 156 | # Total number of images in the training-set. 157 | # This is used to pre-allocate arrays for efficiency. 158 | _num_images_train = _num_files_train * _images_per_file 159 | 160 | 161 | ######################################################################## 162 | # Private functions for downloading, unpacking and loading data-files. 163 | 164 | 165 | def _get_file_path(filename=""): 166 | """ 167 | Return the full path of a data-file for the data-set. 168 | 169 | If filename=="" then return the directory of the files. 170 | """ 171 | 172 | return os.path.join(data_path, "cifar-10-batches-py/", filename) 173 | 174 | 175 | def _unpickle(filename): 176 | """ 177 | Unpickle the given file and return the data. 178 | 179 | Note that the appropriate dir-name is prepended the filename. 180 | """ 181 | 182 | # Create full path for the file. 183 | file_path = _get_file_path(filename) 184 | 185 | with open(file_path, mode='rb') as file: 186 | # In Python 3.X it is important to set the encoding, 187 | # otherwise an exception is raised here. 188 | data = pickle.load(file, encoding='bytes') 189 | 190 | return data 191 | 192 | 193 | def _convert_images(raw): 194 | """ 195 | Convert images from the CIFAR-10 format and 196 | return a 4-dim array with shape: [image_number, height, width, channel] 197 | where the pixels are floats between 0.0 and 1.0. 198 | """ 199 | 200 | # Convert the raw images from the data-files to floating-points. 201 | raw_float = np.array(raw, dtype=float) / 255.0 202 | 203 | # Reshape the array to 4-dimensions. 204 | images = raw_float.reshape([-1, num_channels, img_size, img_size]) 205 | 206 | # Reorder the indices of the array. 207 | images = images.transpose([0, 2, 3, 1]) 208 | 209 | return images 210 | 211 | 212 | def _load_data(filename): 213 | """ 214 | Load a pickled data-file from the CIFAR-10 data-set 215 | and return the converted images (see above) and the class-number 216 | for each image. 217 | """ 218 | 219 | # Load the pickled data-file. 220 | data = _unpickle(filename) 221 | 222 | # Get the raw images. 223 | raw_images = data[b'data'] 224 | 225 | # Get the class-numbers for each image. Convert to numpy-array. 226 | cls = np.array(data[b'labels']) 227 | 228 | # Convert the images. 229 | images = _convert_images(raw_images) 230 | 231 | return images, cls 232 | 233 | 234 | ######################################################################## 235 | # Public functions that you may call to download the data-set from 236 | # the internet and load the data into memory. 237 | 238 | 239 | def maybe_download_and_extract(): 240 | """ 241 | Download and extract the CIFAR-10 data-set if it doesn't already exist 242 | in data_path (set this variable first to the desired path). 243 | """ 244 | 245 | dataset_maybe_download_and_extract(url=data_url, download_dir=data_path) 246 | 247 | 248 | def load_class_names(): 249 | """ 250 | Load the names for the classes in the CIFAR-10 data-set. 251 | 252 | Returns a list with the names. Example: names[3] is the name 253 | associated with class-number 3. 254 | """ 255 | 256 | # Load the class-names from the pickled file. 257 | raw = _unpickle(filename="batches.meta")[b'label_names'] 258 | 259 | # Convert from binary strings. 260 | names = [x.decode('utf-8') for x in raw] 261 | 262 | return names 263 | 264 | 265 | def load_training_data(): 266 | """ 267 | Load all the training-data for the CIFAR-10 data-set. 268 | 269 | The data-set is split into 5 data-files which are merged here. 270 | 271 | Returns the images, class-numbers and one-hot encoded class-labels. 272 | """ 273 | 274 | maybe_download_and_extract() 275 | # Pre-allocate the arrays for the images and class-numbers for efficiency. 276 | images = np.zeros(shape=[_num_images_train, img_size, img_size, num_channels], 277 | dtype=float) 278 | cls = np.zeros(shape=[_num_images_train], dtype=int) 279 | 280 | # Begin-index for the current batch. 281 | begin = 0 282 | 283 | # For each data-file. 284 | for i in range(_num_files_train): 285 | # Load the images and class-numbers from the data-file. 286 | images_batch, cls_batch = _load_data(filename="data_batch_" + str(i + 1)) 287 | 288 | # Number of images in this batch. 289 | num_images = len(images_batch) 290 | 291 | # End-index for the current batch. 292 | end = begin + num_images 293 | 294 | # Store the images into the array. 295 | images[begin:end, :] = images_batch 296 | 297 | # Store the class-numbers into the array. 298 | cls[begin:end] = cls_batch 299 | 300 | # The begin-index for the next batch is the current end-index. 301 | begin = end 302 | 303 | return images, cls 304 | 305 | 306 | def load_test_data(): 307 | """ 308 | Load all the test-data for the CIFAR-10 data-set. 309 | 310 | Returns the images, class-numbers and one-hot encoded class-labels. 311 | """ 312 | 313 | images, cls = _load_data(filename="test_batch") 314 | 315 | return images, cls 316 | 317 | ######################################################################## 318 | -------------------------------------------------------------------------------- /GAN-AE/read_data.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import math 4 | 5 | import numpy as np 6 | import scipy 7 | import scipy.io 8 | 9 | import read_cifar10 as cf10 10 | 11 | 12 | class GeneratorRestartHandler(object): 13 | def __init__(self, gen_func, argv, kwargv): 14 | self.gen_func = gen_func 15 | self.argv = copy.copy(argv) 16 | self.kwargv = copy.copy(kwargv) 17 | self.local_copy = self.gen_func(*self.argv, **self.kwargv) 18 | 19 | def __iter__(self): 20 | return GeneratorRestartHandler(self.gen_func, self.argv, self.kwargv) 21 | 22 | def __next__(self): 23 | return next(self.local_copy) 24 | 25 | def next(self): 26 | return self.__next__() 27 | 28 | 29 | def restartable(g_func): 30 | def tmp(*argv, **kwargv): 31 | return GeneratorRestartHandler(g_func, argv, kwargv) 32 | 33 | return tmp 34 | 35 | 36 | @restartable 37 | def svhn_dataset_generator(dataset_name, batch_size): 38 | assert dataset_name in ['train', 'test'] 39 | assert batch_size > 0 or batch_size == -1 # -1 for entire dataset 40 | 41 | path = './svhn_mat/' 42 | file_name = '%s_32x32.mat' % dataset_name 43 | file_dict = scipy.io.loadmat(os.path.join(path, file_name)) 44 | X_all = file_dict['X'].transpose((3, 0, 1, 2)) 45 | y_all = file_dict['y'] 46 | data_len = X_all.shape[0] 47 | batch_size = batch_size if batch_size > 0 else data_len 48 | 49 | X_all_padded = np.concatenate([X_all, X_all[:batch_size]], axis=0) 50 | y_all_padded = np.concatenate([y_all, y_all[:batch_size]], axis=0) 51 | y_all_padded[y_all_padded == 10] = 0 52 | 53 | for slice_i in range(int(math.ceil(data_len / batch_size))): 54 | idx = slice_i * batch_size 55 | # X_batch = X_all_padded[idx:idx + batch_size] 56 | X_batch = X_all_padded[idx:idx + batch_size]*255 # bugfix, thanks Zezhou Sun! 57 | y_batch = np.ravel(y_all_padded[idx:idx + batch_size]) 58 | yield X_batch, y_batch 59 | 60 | 61 | @restartable 62 | def cifar10_dataset_generator(dataset_name, batch_size, restrict_size=1000): 63 | assert dataset_name in ['train', 'test'] 64 | assert batch_size > 0 or batch_size == -1 # -1 for entire dataset 65 | 66 | X_all_unrestricted, y_all = (cf10.load_training_data() if dataset_name == 'train' 67 | else cf10.load_test_data()) 68 | 69 | actual_restrict_size = restrict_size if dataset_name == 'train' else int(1e10) 70 | X_all = X_all_unrestricted[:actual_restrict_size] 71 | data_len = X_all.shape[0] 72 | batch_size = batch_size if batch_size > 0 else data_len 73 | 74 | X_all_padded = np.concatenate([X_all, X_all[:batch_size]], axis=0) 75 | y_all_padded = np.concatenate([y_all, y_all[:batch_size]], axis=0) 76 | 77 | for slice_i in range(int(math.ceil(data_len / batch_size))): 78 | idx = slice_i * batch_size 79 | X_batch = X_all_padded[idx:idx + batch_size] 80 | y_batch = np.ravel(y_all_padded[idx:idx + batch_size]) 81 | yield X_batch.astype(np.uint8), y_batch.astype(np.uint8) -------------------------------------------------------------------------------- /GAN-AE/utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | #bias initialization 5 | def bias_variable(shape, name=None): 6 | initial = tf.constant(0.0, shape=shape) 7 | if name is None: 8 | return tf.Variable(initial) 9 | else: 10 | return tf.get_variable(name, initializer=initial) 11 | 12 | #xavier init 13 | def _glorot_initializer(prev_units, num_units, stddev_factor=1.0): 14 | stddev = np.sqrt(stddev_factor / np.sqrt(prev_units*num_units)) 15 | return tf.truncated_normal([prev_units, num_units], mean=0.0, stddev=stddev) 16 | 17 | #weight initialization 18 | def weight_variable(shape, stddev=0.02, name=None): 19 | initial = tf.truncated_normal(shape, stddev=stddev) 20 | if name is None: 21 | return tf.Variable(initial) 22 | else: 23 | return tf.get_variable(name, initializer=initial) 24 | 25 | #leaky relu implementation 26 | def leaky_relu(x, alpha=0.2, name=""): 27 | return tf.maximum(alpha * x, x, name) 28 | 29 | #stores activations 30 | def add_activation_summary(var): 31 | tf.summary.histogram(var.op.name + "/activation", var) 32 | tf.summary.scalar(var.op.name + "/sparsity", tf.nn.zero_fraction(var)) 33 | 34 | #combines patches to get the whole image 35 | def block2img(img_blocks,img_size): 36 | row,col = img_size 37 | img=np.zeros((row,col,3)).astype(np.float32) 38 | n,k,l,c=img_blocks.shape 39 | for i in range(0,int(row/k)): 40 | for j in range(0,int(col/k)): 41 | img[i*k:(i+1)*k,j*l:(j+1)*l,:]=img_blocks[int(i*col/k+j),:,:,:] 42 | return img -------------------------------------------------------------------------------- /GAN/CIFAR-10/cifar-10-batches-py/batches.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/GAN/CIFAR-10/cifar-10-batches-py/batches.meta -------------------------------------------------------------------------------- /GAN/CIFAR-10/cifar-10-batches-py/readme.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /GAN/README.md: -------------------------------------------------------------------------------- 1 | ## Lossy image compression using generative adversarial networks 2 | 3 | It contains two structures with 2 different generator model: 4 | - 3 layer fully convolutional network 5 | - Much smaller version of Resnet (given in the report) 6 | 7 | 2 different generators are written in two different functions. In order to use each of them, generator function's name in the training stage should be changed. (generator / generator_res) 8 | 9 | Loss functions: 10 | - Wasserstein gan 11 | - Wasserstein gan + L2 loss 12 | - Dcgan 13 | - Dcgan + L2 loss 14 | 15 | Parameters and the loss function options can be selected by utilizing FLAGS or class variables. 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /GAN/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | import models 6 | from models import * 7 | 8 | FLAGS = tf.flags.FLAGS 9 | tf.flags.DEFINE_integer("batch_size", "64", "batch size for training") 10 | tf.flags.DEFINE_float("learning_rate", "2e-5", "learning rate for optimizers") 11 | tf.flags.DEFINE_float("optimizer_param", "0.5", "beta1 for adam-decay for RMSProp") 12 | tf.flags.DEFINE_float("iterations", "500000", "training iterations") 13 | tf.flags.DEFINE_string("optimizer", "RMSProp", "RMSProp/Adam") 14 | tf.flags.DEFINE_string("loss_type", "wasserstein_l2_loss", "wasserstein/imp_wasserstein/wasserstein_l1_loss/wasserstein_l2_loss/imp_wasserstein_l2_loss") 15 | 16 | def main(argv=None): 17 | discriminator_dims = [3, 16, 64, 1] 18 | kernel_encoder = [5,7,9] 19 | kernel_decoder = [9,7,5] 20 | encoder_dims = [64,16,3] 21 | decoder_dims = [16,32,3] 22 | 23 | print("stage 1") 24 | model = models.GAN_AE(FLAGS.batch_size, 25 | clip_values=(-0.01, 0.01), disc_iterations=5, num_train_data=38400, num_test_data=6400, folder='wgan_l2') 26 | print("stage 2") 27 | model.create_model(discriminator_dims, kernel_encoder, kernel_decoder, encoder_dims, decoder_dims, "RMSProp", FLAGS.learning_rate, 28 | FLAGS.optimizer_param, FLAGS.loss_type) 29 | 30 | print("stage 3") 31 | model.train_model(FLAGS.batch_size, int(FLAGS.iterations)) 32 | 33 | discriminator_dims = [3, 16, 64, 1] 34 | 35 | if __name__ == "__main__": 36 | tf.app.run() 37 | -------------------------------------------------------------------------------- /GAN/models.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | import utils as utils 5 | import read_cifar10 as cf10 6 | import read_data 7 | import skimage.io 8 | import time 9 | 10 | FLAGS = tf.app.flags.FLAGS 11 | 12 | class GAN_AE(object): 13 | def __init__(self, batch_size, clip_values, disc_iterations, num_train_data, num_test_data, folder): 14 | self.batch_size = batch_size 15 | self.input_images = self.cifar10_dataset('train',1, num_train_data) 16 | self.test_images = self.cifar10_dataset('test',1, num_test_data) 17 | self.disc_iterations = disc_iterations 18 | self.clip_values = clip_values #clip of weights in wasserstein to approximate Lipschitz 19 | self.z_dim = 32 #input dim 20 | self.real_dim = 32 #real images dim 21 | self.logs_dir = "logs/wgan_logs/" 22 | self.folder = folder 23 | 24 | #returns original or downsampled cifar images 25 | def cifar10_dataset(self,name, downsample, total_num): 26 | assert name in ['train', 'test'] 27 | X, y = (cf10.load_training_data() if name == 'train' 28 | else cf10.load_test_data()) 29 | data = X[:total_num] 30 | return data[:,::downsample,::downsample,:] 31 | 32 | #returns batch images 33 | def cifar10_batches(self, name, downsample, batch_size, num, total_num=12800): 34 | assert name in ['train', 'test'] 35 | assert total_num % batch_size == 0 or batch_size == -1 36 | X0, y0 = (cf10.load_training_data() if name == 'train' 37 | else cf10.load_test_data()) 38 | X = X0[:total_num] 39 | X_batch = X[num * batch_size:(num+1) + batch_size] 40 | X_batch = X_batch[:,::downsample,::downsample,:] 41 | return X_batch 42 | 43 | #returns randomly sampled inputs and indices 44 | def batch_sampler(self, x, batch_size): 45 | shape = x.shape 46 | assert len(shape) == 4 47 | data_num = shape[0] 48 | idx = np.random.randint(low=0, high=data_num-1, size=batch_size) 49 | batch = x[idx] 50 | return batch, idx 51 | 52 | #upsampling x2 53 | def upscale(self,x): 54 | old_size = x.get_shape() 55 | size = [2*int(k) for k in old_size[1:3]] 56 | out = tf.image.resize_images(x, size) 57 | return out 58 | 59 | #xavier initialization for conv 60 | def glorot_initializer_conv2d(self, prev_units, num_units, mapsize, stddev_factor=1.0): 61 | stddev = np.sqrt(stddev_factor / (np.sqrt(prev_units*num_units)*mapsize*mapsize)) 62 | return tf.truncated_normal([mapsize, mapsize, prev_units, num_units], 63 | mean=0.0, stddev=stddev) 64 | 65 | #conv2d with stride number = 2 66 | def conv2d_strided(self, x, W, b): 67 | conv = tf.nn.conv2d(x, W, strides=[1, 2, 2, 1], padding="SAME") 68 | return tf.nn.bias_add(conv, b) 69 | 70 | #transposed conv2d 71 | def conv2d_transpose(self, x, num_units, mapsize=1, stride=1, stddev_factor=1.0): 72 | assert len(x.get_shape()) == 4 73 | with tf.variable_scope("conv2d_transpose"): 74 | prev_units = int(x.get_shape()[-1]) 75 | initw = self.glorot_initializer_conv2d(prev_units, num_units, 76 | mapsize, stddev_factor=stddev_factor) 77 | weight = tf.get_variable('weight', initializer=initw) 78 | weight = tf.transpose(weight, perm=[0, 1, 3, 2]) 79 | output_shape = [FLAGS.batch_size, 80 | int(x.get_shape()[1]) * stride, 81 | int(x.get_shape()[2]) * stride, num_units] 82 | out = tf.nn.conv2d_transpose(x, weight, 83 | output_shape=output_shape, 84 | strides=[1, stride, stride, 1], 85 | padding='SAME') 86 | initb = tf.constant(0.0, shape=[num_units]) 87 | bias = tf.get_variable('bias', initializer=initb) 88 | out = tf.nn.bias_add(out, bias) 89 | return out 90 | 91 | #autoencoder (generator) 92 | def generator(self, x,kernels1=[5,7,9],kernels2=[9,7,5],filters1=[64,16,3],filters2=[16,32,3],pool_size=[1,2,2], scope_name="generator"): 93 | out=x 94 | with tf.variable_scope(scope_name): 95 | for k in range(len(kernels1)): 96 | conv = tf.layers.conv2d(inputs=out, 97 | filters=filters1[k], 98 | kernel_size=[kernels1[k],kernels1[k]], 99 | padding="same", 100 | activation=tf.nn.relu, 101 | name='conv'+str(k)) 102 | pool_now=pool_size[k] 103 | if(pool_now==1): 104 | out=conv 105 | else: 106 | out = tf.layers.max_pooling2d(inputs=conv, 107 | pool_size=[pool_now,pool_now], 108 | strides=pool_now, 109 | name = 'pool'+str(k)) 110 | out_quant=tf.round(out*255.)/255. 111 | for k in range(len(kernels2)): 112 | with tf.variable_scope("deconv") as var_scope: 113 | pool_now=pool_size[-1-k] 114 | if(pool_now==1): 115 | x_up=out 116 | out = tf.layers.conv2d(inputs=x_up, 117 | filters=filters2[k], 118 | kernel_size=[kernels2[k],kernels2[k]], 119 | padding="same", 120 | activation=tf.nn.relu, 121 | name='deconv'+str(k)) 122 | else: 123 | shape = out.get_shape().as_list() 124 | x_up = tf.image.resize_images(out,[shape[1]*pool_now,shape[2]*pool_now]) 125 | out = tf.layers.conv2d(inputs=x_up, 126 | filters=filters2[k], 127 | kernel_size=[kernels2[k],kernels2[k]], 128 | padding="same", 129 | activation=tf.nn.relu, 130 | name='deconv'+str(k)) 131 | return out 132 | 133 | #discriminator 134 | def discriminator(self, input_images, dims, activation=tf.nn.relu, scope_name="discriminator", 135 | scope_reuse=False): 136 | with tf.variable_scope(scope_name) as scope: 137 | if scope_reuse: 138 | scope.reuse_variables() 139 | h = input_images 140 | for index in range(2): 141 | W = utils.weight_variable([4, 4, dims[index], dims[index + 1]], name="W_%d" % index) 142 | b = utils.bias_variable([dims[index + 1]], name="b_%d" % index) 143 | h = self.conv2d_strided(h, W, b) 144 | W_pred = utils.weight_variable([4, 4, dims[-2], dims[-1]], name="W_pred") 145 | b = tf.zeros([dims[-1]]) 146 | h_pred = self.conv2d_strided(h, W_pred, b) 147 | return h_pred 148 | 149 | #wasserstein loss 150 | def wgan_loss(self, logits_real, logits_fake): 151 | self.discriminator_loss = tf.reduce_mean(logits_real- logits_fake) 152 | self.gen_loss = tf.reduce_mean(logits_fake) 153 | tf.summary.scalar("Disc_loss", self.discriminator_loss) 154 | tf.summary.scalar("Gen_loss", self.gen_loss) 155 | 156 | #weighted L1 loss 157 | def add_l1_loss(self, real_image, fake_image, reg_var): 158 | self.gen_loss += reg_var * tf.reduce_mean(tf.abs(real_image - fake_image)) 159 | self.mse_loss = tf.reduce_mean(tf.square(real_image - fake_image)) 160 | 161 | #weighted L2 loss 162 | def add_l2_loss(self, real_image, fake_image, reg_var): 163 | self.mse_loss = reg_var * tf.reduce_mean(tf.square(real_image - fake_image)) 164 | self.gen_loss += self.mse_loss 165 | 166 | #improved wasserstein loss / discriminator dimension needs to be given by hand 167 | def imp_wgan_loss(self, logits_real, logits_fake, real_images, fake_images): 168 | lmda = 1000 169 | self.gen_loss = tf.reduce_mean(logits_fake) 170 | self.discriminator_loss = tf.reduce_mean(logits_real) - tf.reduce_mean(logits_fake) 171 | alpha = tf.random_uniform( shape=[self.batch_size,1], minval=0., maxval=1.) 172 | shape = fake_images.get_shape().as_list() 173 | out_dim = shape[1] * shape[1] * shape[3] 174 | 175 | real_data = tf.reshape(real_images, [self.batch_size, out_dim]) 176 | fake_data = tf.reshape(fake_images, [self.batch_size, out_dim]) 177 | 178 | error = fake_data - real_data 179 | new_var = real_data + (alpha*error) 180 | new_var = tf.reshape(new_var, [self.batch_size, shape[1], shape[1],shape[3]]) 181 | gradients = tf.gradients(self.discriminator(new_var, [3, 16, 64,1], scope_reuse=True), [new_var])[0] 182 | slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1,2,3])) 183 | gradient_penalty = tf.reduce_mean((slopes-1.)**2) 184 | self.discriminator_loss += lmda*gradient_penalty 185 | 186 | #choose optimizer for the network 187 | def get_optimizer(self, optimizer_name, learning_rate, optimizer_param): 188 | self.learning_rate = learning_rate 189 | if optimizer_name == "Adam": 190 | return tf.train.AdamOptimizer(learning_rate, beta1=optimizer_param) 191 | elif optimizer_name == "RMSProp": 192 | return tf.train.RMSPropOptimizer(learning_rate, decay=optimizer_param) 193 | else: 194 | raise ValueError("Unknown optimizer %s" % optimizer_name) 195 | 196 | #calculate and apply gradients of the specified variables 197 | def optimizer_train(self, loss_val, var_list, optimizer): 198 | grads = optimizer.compute_gradients(loss_val, var_list=var_list) 199 | return optimizer.apply_gradients(grads) 200 | 201 | #creates the placeholders and the model 202 | def create_model(self, disc_dims, kernel_encoder, kernel_decoder, encoder_dims, decoder_dims, optimizer="Adam", learning_rate=2e-4, 203 | optimizer_param=0.9, loss_type="wasserstein"): 204 | self.input_batch = tf.placeholder(tf.float32, [self.batch_size, self.z_dim, self.z_dim, 3], name="z") 205 | self.real_batch = tf.placeholder(tf.float32, [self.batch_size, self.real_dim, self.real_dim, 3], name="z") 206 | self.gen_images = self.generator(self.input_batch, kernel_encoder, kernel_decoder, encoder_dims, decoder_dims, scope_name="generator") 207 | 208 | tf.summary.image("image_real", self.real_batch*255+127.5, max_outputs=1) 209 | tf.summary.image("image_generated", self.gen_images*255+127.5, max_outputs=1) 210 | logits_real= self.discriminator(self.real_batch, disc_dims, 211 | activation=utils.leaky_relu, 212 | scope_name="discriminator", 213 | scope_reuse=False) 214 | logits_fake = self.discriminator(self.gen_images, disc_dims, 215 | activation=utils.leaky_relu, 216 | scope_name="discriminator", 217 | scope_reuse=True) 218 | if loss_type == "wasserstein": 219 | self.wgan_loss(logits_real, logits_fake) 220 | elif loss_type == "imp_wasserstein": 221 | self.imp_wgan_loss(logits_real, logits_fake,self.real_batch, self.gen_images) 222 | elif loss_type == "imp_wasserstein_l2_loss": 223 | self.imp_wgan_loss(logits_real, logits_fake,self.real_batch, self.gen_images) 224 | self.add_l2_loss(self.real_batch, self.gen_images, 1) 225 | elif loss_type == "wasserstein_l2_loss": 226 | self.wgan_loss(logits_real, logits_fake) 227 | self.add_l2_loss(self.real_batch, self.gen_images, 10) 228 | elif loss_type == "wasserstein_l1_loss": 229 | self.wgan_loss(logits_real, logits_fake) 230 | self.add_l1_loss(self.real_batch, self.gen_images, 10) 231 | # elif loss_type =="dcgan": 232 | # self.dcgan_loss() 233 | else: 234 | raise ValueError("Unknown loss %s" % loss_type) 235 | 236 | train_variables = tf.trainable_variables() 237 | self.generator_variables = [v for v in train_variables if v.name.startswith("generator")] 238 | self.discriminator_variables = [v for v in train_variables if v.name.startswith("discriminator")] 239 | optim = self.get_optimizer(optimizer, learning_rate, optimizer_param) 240 | 241 | self.generator_train_op = self.optimizer_train(self.gen_loss, self.generator_variables, optim) 242 | self.discriminator_train_op = self.optimizer_train(self.discriminator_loss, self.discriminator_variables, optim) 243 | 244 | #trains the model and prints outputs to txt file; saves images in some intermediate steps/ tests on Lena 245 | def train_model(self, num_data, max_iterations): 246 | start_time = time.time() 247 | sess = tf.InteractiveSession() 248 | self.summary_op = tf.summary.merge_all() 249 | self.saver = tf.train.Saver() 250 | self.summary_writer = tf.summary.FileWriter(self.logs_dir, sess.graph) #logs dir i yazzzz 251 | 252 | sess.run(tf.global_variables_initializer()) 253 | mse = [] 254 | clip_discriminator_var_op = [var.assign(tf.clip_by_value(var, self.clip_values[0], self.clip_values[1])) for var in self.discriminator_variables] 255 | 256 | def get_feed_dict(): 257 | batch_z, idx = self.batch_sampler(self.input_images, self.batch_size) 258 | batch_real = self.input_images[idx] 259 | feed_dict = {self.input_batch: batch_z, self.real_batch: batch_real} 260 | return feed_dict 261 | 262 | f = open("./"+self.folder+"/mse.txt", "wb") 263 | for itr in xrange(1, max_iterations): 264 | if itr < 25 or itr % 100 == 0: 265 | disc_itrs = 25 266 | else: 267 | disc_itrs = self.disc_iterations 268 | 269 | for disc_itr in range(disc_itrs): 270 | sess.run(self.discriminator_train_op, feed_dict=get_feed_dict()) 271 | sess.run(clip_discriminator_var_op) 272 | feed_dict = get_feed_dict() 273 | sess.run(self.generator_train_op, feed_dict=feed_dict) 274 | 275 | if itr % 10000 ==0: 276 | gen_out = sess.run(self.gen_images, feed_dict=feed_dict ) 277 | real_out = sess.run(self.real_batch, feed_dict=feed_dict ) 278 | print(np.mean(np.square(gen_out-real_out))) 279 | 280 | if itr % 100== 0: 281 | summary_str = sess.run(self.summary_op, feed_dict=feed_dict) 282 | self.summary_writer.add_summary(summary_str, itr) 283 | 284 | gen_out = sess.run(self.gen_images, feed_dict=feed_dict ) 285 | real_out = sess.run(self.real_batch, feed_dict=feed_dict ) 286 | 287 | mse.append(sess.run(self.mse_loss, feed_dict=feed_dict )) 288 | cur_mse = sess.run(self.mse_loss, feed_dict=feed_dict ) 289 | f.write(str(cur_mse)) 290 | f.write("\n") 291 | print(np.mean(np.square(gen_out-real_out))) 292 | 293 | def convert2uint8(img): 294 | img[img>255]=255 295 | img[img<0]=0 296 | return img.astype(np.uint8) 297 | 298 | new_fake = utils.block2img(gen_out, (256,256)) 299 | new_real = utils.block2img(real_out, (256,256)) 300 | print_fake = convert2uint8(new_fake*255) 301 | 302 | skimage.io.imsave('./'+self.folder+'/genful'+str(itr)+'.tiff', print_fake) 303 | skimage.io.imsave('./'+self.folder+'/realful'+str(itr)+'.tiff', new_real) 304 | 305 | #lena test 306 | lena = skimage.io.imread('./lena.tiff') 307 | lena = np.asarray(lena) 308 | lena = lena.astype(np.float32) 309 | row,col,color = lena.shape 310 | img_8x8=np.zeros((int(row*col/1024),32,32,3)).astype(np.float32) 311 | count =0 312 | for i in range(0,row-row%32,32): 313 | for j in range(0,col-col%32,32): 314 | img_8x8[count,:,:,:]=lena[i:i+32,j:j+32,:] 315 | count = count +1 316 | test_img = img_8x8 317 | test = test_img[:64] 318 | feed_test = {self.input_batch: test, self.real_batch: test} 319 | block1 = sess.run(self.gen_images, feed_dict=feed_test ) 320 | 321 | for i in range(1,4): 322 | test = test_img[64*i:64*(i+1)] 323 | feed_test = {self.input_batch: test, self.real_batch: test} 324 | block2 = sess.run(self.gen_images, feed_dict=feed_test ) 325 | block1 = np.concatenate( [block1, block2], axis=0 ) 326 | 327 | test_out = np.asarray(block1) 328 | test_res = utils.block2img(test_out, (512,512)) 329 | test_res = convert2uint8(test_res) 330 | skimage.io.imsave('./'+self.folder+'/test'+str(itr)+'.tiff', test_res) 331 | # 332 | if itr % 10000 == 0: 333 | stop_time = time.time() 334 | duration = (stop_time - start_time) / 5.0 335 | start_time = stop_time 336 | g_loss_val, d_loss_val = sess.run([self.gen_loss, self.discriminator_loss], 337 | feed_dict=feed_dict) 338 | print("Time: %g/itr, Step: %d, generator loss: %g, discriminator_loss: %g" % ( 339 | duration, itr, g_loss_val, d_loss_val)) 340 | 341 | if itr % 10000 == 0: 342 | self.saver.save(sess, self.logs_dir + "model.ckpt", global_step=itr) 343 | f.close() 344 | 345 | 346 | 347 | 348 | 349 | 350 | -------------------------------------------------------------------------------- /GAN/models.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/GAN/models.pyc -------------------------------------------------------------------------------- /GAN/read_cifar10.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | #### 3 | # COPIED FROM https://github.com/Hvass-Labs/TensorFlow-Tutorials/ 4 | # and modified 5 | 6 | 7 | ######################################################################## 8 | # 9 | # Functions for downloading the CIFAR-10 data-set from the internet 10 | # and loading it into memory. 11 | # 12 | # Implemented in Python 3.5 13 | # 14 | # Usage: 15 | # 1) Set the variable data_path with the desired storage path. 16 | # 2) Call maybe_download_and_extract() to download the data-set 17 | # if it is not already located in the given data_path. 18 | # 3) Call load_class_names() to get an array of the class-names. 19 | # 4) Call load_training_data() and load_test_data() to get 20 | # the images, class-numbers and one-hot encoded class-labels 21 | # for the training-set and test-set. 22 | # 5) Use the returned data in your own program. 23 | # 24 | # Format: 25 | # The images for the training- and test-sets are returned as 4-dim numpy 26 | # arrays each with the shape: [image_number, height, width, channel] 27 | # where the individual pixels are floats between 0.0 and 1.0. 28 | # 29 | ######################################################################## 30 | # 31 | # This file is part of the TensorFlow Tutorials available at: 32 | # 33 | # https://github.com/Hvass-Labs/TensorFlow-Tutorials 34 | # 35 | # Published under the MIT License. See the file LICENSE for details. 36 | # 37 | # Copyright 2016 by Magnus Erik Hvass Pedersen 38 | # 39 | ######################################################################## 40 | 41 | import numpy as np 42 | import pickle 43 | 44 | import sys 45 | import os 46 | import six.moves.urllib as urllib 47 | import tarfile 48 | import zipfile 49 | 50 | 51 | ######################################################################## 52 | 53 | 54 | def _print_download_progress(count, block_size, total_size): 55 | """ 56 | Function used for printing the download progress. 57 | Used as a call-back function in maybe_download_and_extract(). 58 | """ 59 | 60 | # Percentage completion. 61 | pct_complete = float(count * block_size) / total_size 62 | 63 | # Status-message. Note the \r which means the line should overwrite itself. 64 | msg = "\r- Download progress: {0:.1%}".format(pct_complete) 65 | 66 | # Print it. 67 | sys.stdout.write(msg) 68 | sys.stdout.flush() 69 | 70 | 71 | ######################################################################## 72 | 73 | 74 | def dataset_maybe_download_and_extract(url, download_dir): 75 | """ 76 | Download and extract the data if it doesn't already exist. 77 | Assumes the url is a tar-ball file. 78 | :param url: 79 | Internet URL for the tar-file to download. 80 | Example: "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" 81 | :param download_dir: 82 | Directory where the downloaded file is saved. 83 | Example: "data/CIFAR-10/" 84 | :return: 85 | Nothing. 86 | """ 87 | 88 | # Filename for saving the file downloaded from the internet. 89 | # Use the filename from the URL and add it to the download_dir. 90 | filename = url.split('/')[-1] 91 | file_path = os.path.join(download_dir, filename) 92 | 93 | # Check if the file already exists. 94 | # If it exists then we assume it has also been extracted, 95 | # otherwise we need to download and extract it now. 96 | if not os.path.exists(file_path): 97 | # Check if the download directory exists, otherwise create it. 98 | if not os.path.exists(download_dir): 99 | os.makedirs(download_dir) 100 | 101 | # Download the file from the internet. 102 | file_path, _ = urllib.request.urlretrieve(url=url, 103 | filename=file_path, 104 | reporthook=_print_download_progress) 105 | 106 | print() 107 | print("Download finished. Extracting files.") 108 | 109 | if file_path.endswith(".zip"): 110 | # Unpack the zip-file. 111 | zipfile.ZipFile(file=file_path, mode="r").extractall(download_dir) 112 | elif file_path.endswith((".tar.gz", ".tgz")): 113 | # Unpack the tar-ball. 114 | tarfile.open(name=file_path, mode="r:gz").extractall(download_dir) 115 | 116 | print("Done.") 117 | 118 | 119 | ######################################################################## 120 | 121 | 122 | ######################################################################## 123 | 124 | # Directory where you want to download and save the data-set. 125 | # Set this before you start calling any of the functions below. 126 | data_path = "./CIFAR-10/" 127 | 128 | # URL for the data-set on the internet. 129 | data_url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" 130 | 131 | ######################################################################## 132 | # Various constants for the size of the images. 133 | # Use these constants in your own program. 134 | 135 | # Width and height of each image. 136 | img_size = 32 137 | 138 | # Number of channels in each image, 3 channels: Red, Green, Blue. 139 | num_channels = 3 140 | 141 | # Length of an image when flattened to a 1-dim array. 142 | img_size_flat = img_size * img_size * num_channels 143 | 144 | # Number of classes. 145 | num_classes = 10 146 | 147 | ######################################################################## 148 | # Various constants used to allocate arrays of the correct size. 149 | 150 | # Number of files for the training-set. 151 | _num_files_train = 5 152 | 153 | # Number of images for each batch-file in the training-set. 154 | _images_per_file = 10000 155 | 156 | # Total number of images in the training-set. 157 | # This is used to pre-allocate arrays for efficiency. 158 | _num_images_train = _num_files_train * _images_per_file 159 | 160 | 161 | ######################################################################## 162 | # Private functions for downloading, unpacking and loading data-files. 163 | 164 | 165 | def _get_file_path(filename=""): 166 | """ 167 | Return the full path of a data-file for the data-set. 168 | If filename=="" then return the directory of the files. 169 | """ 170 | 171 | return os.path.join(data_path, "cifar-10-batches-py/", filename) 172 | 173 | 174 | def _unpickle(filename): 175 | """ 176 | Unpickle the given file and return the data. 177 | Note that the appropriate dir-name is prepended the filename. 178 | """ 179 | 180 | # Create full path for the file. 181 | file_path = _get_file_path(filename) 182 | 183 | with open(file_path, mode='rb') as file: 184 | # In Python 3.X it is important to set the encoding, 185 | # otherwise an exception is raised here. 186 | data = pickle.load(file) 187 | 188 | return data 189 | 190 | 191 | def _convert_images(raw): 192 | """ 193 | Convert images from the CIFAR-10 format and 194 | return a 4-dim array with shape: [image_number, height, width, channel] 195 | where the pixels are floats between 0.0 and 1.0. 196 | """ 197 | 198 | # Convert the raw images from the data-files to floating-points. 199 | raw_float = np.array(raw, dtype=float) / 255.0 200 | 201 | # Reshape the array to 4-dimensions. 202 | images = raw_float.reshape([-1, num_channels, img_size, img_size]) 203 | 204 | # Reorder the indices of the array. 205 | images = images.transpose([0, 2, 3, 1]) 206 | 207 | return images 208 | 209 | 210 | def _load_data(filename): 211 | """ 212 | Load a pickled data-file from the CIFAR-10 data-set 213 | and return the converted images (see above) and the class-number 214 | for each image. 215 | """ 216 | 217 | # Load the pickled data-file. 218 | data = _unpickle(filename) 219 | 220 | # Get the raw images. 221 | raw_images = data[b'data'] 222 | 223 | # Get the class-numbers for each image. Convert to numpy-array. 224 | cls = np.array(data[b'labels']) 225 | 226 | # Convert the images. 227 | images = _convert_images(raw_images) 228 | 229 | return images, cls 230 | 231 | 232 | ######################################################################## 233 | # Public functions that you may call to download the data-set from 234 | # the internet and load the data into memory. 235 | 236 | 237 | def maybe_download_and_extract(): 238 | """ 239 | Download and extract the CIFAR-10 data-set if it doesn't already exist 240 | in data_path (set this variable first to the desired path). 241 | """ 242 | 243 | dataset_maybe_download_and_extract(url=data_url, download_dir=data_path) 244 | 245 | 246 | def load_class_names(): 247 | """ 248 | Load the names for the classes in the CIFAR-10 data-set. 249 | Returns a list with the names. Example: names[3] is the name 250 | associated with class-number 3. 251 | """ 252 | 253 | # Load the class-names from the pickled file. 254 | raw = _unpickle(filename="batches.meta")[b'label_names'] 255 | 256 | # Convert from binary strings. 257 | names = [x.decode('utf-8') for x in raw] 258 | 259 | return names 260 | 261 | 262 | def load_training_data(): 263 | """ 264 | Load all the training-data for the CIFAR-10 data-set. 265 | The data-set is split into 5 data-files which are merged here. 266 | Returns the images, class-numbers and one-hot encoded class-labels. 267 | """ 268 | 269 | maybe_download_and_extract() 270 | # Pre-allocate the arrays for the images and class-numbers for efficiency. 271 | images = np.zeros(shape=[_num_images_train, img_size, img_size, num_channels], 272 | dtype=float) 273 | cls = np.zeros(shape=[_num_images_train], dtype=int) 274 | 275 | # Begin-index for the current batch. 276 | begin = 0 277 | 278 | # For each data-file. 279 | for i in range(_num_files_train): 280 | # Load the images and class-numbers from the data-file. 281 | images_batch, cls_batch = _load_data(filename="data_batch_" + str(i + 1)) 282 | 283 | # Number of images in this batch. 284 | num_images = len(images_batch) 285 | 286 | # End-index for the current batch. 287 | end = begin + num_images 288 | 289 | # Store the images into the array. 290 | images[begin:end, :] = images_batch 291 | 292 | # Store the class-numbers into the array. 293 | cls[begin:end] = cls_batch 294 | 295 | # The begin-index for the next batch is the current end-index. 296 | begin = end 297 | 298 | return images, cls 299 | 300 | 301 | def load_test_data(): 302 | """ 303 | Load all the test-data for the CIFAR-10 data-set. 304 | Returns the images, class-numbers and one-hot encoded class-labels. 305 | """ 306 | 307 | images, cls = _load_data(filename="test_batch") 308 | 309 | return images, cls 310 | 311 | ######################################################################## -------------------------------------------------------------------------------- /GAN/read_cifar10.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/GAN/read_cifar10.pyc -------------------------------------------------------------------------------- /GAN/read_data.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import math 4 | 5 | import numpy as np 6 | import scipy 7 | import scipy.io 8 | 9 | import read_cifar10 as cf10 10 | 11 | 12 | class GeneratorRestartHandler(object): 13 | def __init__(self, gen_func, argv, kwargv): 14 | self.gen_func = gen_func 15 | self.argv = copy.copy(argv) 16 | self.kwargv = copy.copy(kwargv) 17 | self.local_copy = self.gen_func(*self.argv, **self.kwargv) 18 | 19 | def __iter__(self): 20 | return GeneratorRestartHandler(self.gen_func, self.argv, self.kwargv) 21 | 22 | def __next__(self): 23 | return next(self.local_copy) 24 | 25 | def next(self): 26 | return self.__next__() 27 | 28 | 29 | def restartable(g_func): 30 | def tmp(*argv, **kwargv): 31 | return GeneratorRestartHandler(g_func, argv, kwargv) 32 | 33 | return tmp 34 | 35 | 36 | @restartable 37 | def svhn_dataset_generator(dataset_name, batch_size): 38 | assert dataset_name in ['train', 'test'] 39 | assert batch_size > 0 or batch_size == -1 # -1 for entire dataset 40 | 41 | path = './svhn_mat/' 42 | file_name = '%s_32x32.mat' % dataset_name 43 | file_dict = scipy.io.loadmat(os.path.join(path, file_name)) 44 | X_all = file_dict['X'].transpose((3, 0, 1, 2)) 45 | y_all = file_dict['y'] 46 | data_len = X_all.shape[0] 47 | batch_size = batch_size if batch_size > 0 else data_len 48 | 49 | X_all_padded = np.concatenate([X_all, X_all[:batch_size]], axis=0) 50 | y_all_padded = np.concatenate([y_all, y_all[:batch_size]], axis=0) 51 | y_all_padded[y_all_padded == 10] = 0 52 | 53 | for slice_i in range(int(math.ceil(data_len / batch_size))): 54 | idx = slice_i * batch_size 55 | # X_batch = X_all_padded[idx:idx + batch_size] 56 | X_batch = X_all_padded[idx:idx + batch_size]*255 # bugfix, thanks Zezhou Sun! 57 | y_batch = np.ravel(y_all_padded[idx:idx + batch_size]) 58 | yield X_batch, y_batch 59 | 60 | 61 | @restartable 62 | def cifar10_dataset_generator(dataset_name, batch_size, restrict_size=1000): 63 | assert dataset_name in ['train', 'test'] 64 | assert batch_size > 0 or batch_size == -1 # -1 for entire dataset 65 | 66 | X_all_unrestricted, y_all = (cf10.load_training_data() if dataset_name == 'train' 67 | else cf10.load_test_data()) 68 | 69 | actual_restrict_size = restrict_size if dataset_name == 'train' else int(1e10) 70 | X_all = X_all_unrestricted[:actual_restrict_size] 71 | data_len = X_all.shape[0] 72 | batch_size = batch_size if batch_size > 0 else data_len 73 | 74 | X_all_padded = np.concatenate([X_all, X_all[:batch_size]], axis=0) 75 | y_all_padded = np.concatenate([y_all, y_all[:batch_size]], axis=0) 76 | 77 | for slice_i in range(int(math.ceil(data_len / batch_size))): 78 | idx = slice_i * batch_size 79 | X_batch = X_all_padded[idx:idx + batch_size] 80 | y_batch = np.ravel(y_all_padded[idx:idx + batch_size]) 81 | yield X_batch.astype(np.uint8), y_batch.astype(np.uint8) -------------------------------------------------------------------------------- /GAN/read_data.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/GAN/read_data.pyc -------------------------------------------------------------------------------- /GAN/utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | #bias initialization 5 | def bias_variable(shape, name=None): 6 | initial = tf.constant(0.0, shape=shape) 7 | if name is None: 8 | return tf.Variable(initial) 9 | else: 10 | return tf.get_variable(name, initializer=initial) 11 | 12 | #xavier init 13 | def _glorot_initializer(prev_units, num_units, stddev_factor=1.0): 14 | stddev = np.sqrt(stddev_factor / np.sqrt(prev_units*num_units)) 15 | return tf.truncated_normal([prev_units, num_units], mean=0.0, stddev=stddev) 16 | 17 | #weight initialization 18 | def weight_variable(shape, stddev=0.02, name=None): 19 | initial = tf.truncated_normal(shape, stddev=stddev) 20 | if name is None: 21 | return tf.Variable(initial) 22 | else: 23 | return tf.get_variable(name, initializer=initial) 24 | 25 | #leaky relu implementation 26 | def leaky_relu(x, alpha=0.2, name=""): 27 | return tf.maximum(alpha * x, x, name) 28 | 29 | #stores activations 30 | def add_activation_summary(var): 31 | tf.summary.histogram(var.op.name + "/activation", var) 32 | tf.summary.scalar(var.op.name + "/sparsity", tf.nn.zero_fraction(var)) 33 | 34 | #combines patches to get the whole image 35 | def block2img(img_blocks,img_size): 36 | row,col = img_size 37 | img=np.zeros((row,col,3)).astype(np.float32) 38 | n,k,l,c=img_blocks.shape 39 | for i in range(0,int(row/k)): 40 | for j in range(0,int(col/k)): 41 | img[i*k:(i+1)*k,j*l:(j+1)*l,:]=img_blocks[int(i*col/k+j),:,:,:] 42 | return img -------------------------------------------------------------------------------- /GAN/utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/GAN/utils.pyc -------------------------------------------------------------------------------- /MLP_lossless/.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Deep Learning Image Compression Project MLP part\n", 8 | "\n", 9 | "This code applies predictive coding algoritm with a basic MLP structure. Details of predictive coding algorithm can be found [here](https://web.stanford.edu/class/ee398a/handouts/lectures/06-Prediction.pdf)\n", 10 | "\n", 11 | "The code has four parts\n", 12 | "\n", 13 | "1. Huffman encoder (Coppied from [here](http://www.techrepublic.com/article/huffman-coding-in-python/))\n", 14 | "2. Creation of prediction blocks and label for predictive coding\n", 15 | "3. Linear regression algorithm for seeing the baseline\n", 16 | "4. MLP algorithm (initial phase)" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "# Part-1: Huffman encoder\n" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 1, 29 | "metadata": { 30 | "collapsed": false 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "#Binary tree data structure\n", 35 | "#http://www.techrepublic.com/article/huffman-coding-in-python/\n", 36 | "class Node(object):\n", 37 | "\tleft = None\n", 38 | "\tright = None\n", 39 | "\titem = None\n", 40 | "\tweight = 0\n", 41 | "\n", 42 | "\tdef __init__(self, i, w):\n", 43 | "\t\tself.item = i\n", 44 | "\t\tself.weight = w\n", 45 | "\n", 46 | "\tdef setChildren(self, ln, rn):\n", 47 | "\t\tself.left = ln\n", 48 | "\t\tself.right = rn\n", 49 | "\n", 50 | "\tdef __repr__(self):\n", 51 | "\t\treturn \"%s - %s — %s _ %s\" % (self.item, self.weight, self.left, self.right)\n", 52 | "\n", 53 | "\tdef __cmp__(self, a):\n", 54 | "\t\treturn cmp(self.weight, a.weight)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 2, 60 | "metadata": { 61 | "collapsed": false 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "#Huffman Encoder\n", 66 | "#http://www.techrepublic.com/article/huffman-coding-in-python/\n", 67 | "\n", 68 | "from itertools import groupby\n", 69 | "from heapq import *\n", 70 | "\n", 71 | "\n", 72 | "#Huffman encoder \n", 73 | "def huffman(input):\n", 74 | " itemqueue = [Node(a,len(list(b))) for a,b in groupby(sorted(input))]\n", 75 | " heapify(itemqueue)\n", 76 | " while len(itemqueue) > 1:\n", 77 | " l = heappop(itemqueue)\n", 78 | " r = heappop(itemqueue)\n", 79 | " n = Node(None, r.weight+l.weight)\n", 80 | " n.setChildren(l,r)\n", 81 | " heappush(itemqueue, n) \n", 82 | " \n", 83 | " codes = {}\n", 84 | " def codeIt(s, node):\n", 85 | " if node.item:\n", 86 | " if not s:\n", 87 | " codes[node.item] = \"0\"\n", 88 | " else:\n", 89 | " codes[node.item] = s\n", 90 | " else:\n", 91 | " codeIt(s+\"0\", node.left)\n", 92 | " codeIt(s+\"1\", node.right)\n", 93 | " codeIt(\"\",itemqueue[0])\n", 94 | " return codes, \"\".join([codes[a] for a in input])\n" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 3, 100 | "metadata": { 101 | "collapsed": false 102 | }, 103 | "outputs": [ 104 | { 105 | "name": "stdout", 106 | "output_type": "stream", 107 | "text": [ 108 | "Bitrate of the original image\n", 109 | "Bits per pixel is 7.46820831299 bpp\n" 110 | ] 111 | } 112 | ], 113 | "source": [ 114 | "#Test Huffman encoder with an image\n", 115 | "\n", 116 | "import matplotlib.pyplot as plt\n", 117 | "import matplotlib.image as mpimg\n", 118 | "import numpy as np\n", 119 | "img=mpimg.imread('lena512.bmp')\n", 120 | "#print(img.shape)\n", 121 | "#imgplot=plt.imshow(img,cmap='gray')\n", 122 | "\n", 123 | "img_input=img.reshape([-1]).astype(str)\n", 124 | "#print(img_input)\n", 125 | "huffman_img = huffman(img_input)\n", 126 | "#print(huffman_img[1])\n", 127 | "\n", 128 | "#print('Huffman code for ' + str(img) + ' is ' + str(huffman_img))\n", 129 | "#print('Original length is '+str(len(input) * 8)+', length of huffman coding is '+ str(len(huffman(input)[1])))\n", 130 | "print('Bitrate of the original image')\n", 131 | "print('Bits per pixel is ' + str(float(len(huffman_img[1])/float(len(img_input)))) + ' bpp')" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "# Part-2: Creation of prediction blocks and label for predictive coding\n" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 4, 144 | "metadata": { 145 | "collapsed": false 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "#Lossless image copmpression using predictive coding. For reference see below\n", 150 | "#(https://web.stanford.edu/class/ee398a/handouts/lectures/06-Prediction.pdf)\n", 151 | "\n", 152 | "from itertools import product\n", 153 | "\n", 154 | "\n", 155 | "#Returns prediction blocks and the corresponding pixels in the image\n", 156 | "#Very naive implementation, neglects boundaries, can be improved further\n", 157 | "def pred_vectors(img,pred_size):\n", 158 | " (n,m)=img.shape #image size\n", 159 | " k,l=pred_size #Size of the predictive window\n", 160 | " \n", 161 | " fvec=np.zeros([(n-k-1)*(m-2*l),2*k*l+k+l])\n", 162 | " #print(fvec.shape)\n", 163 | " label = np.zeros([(n-k-1)*(m-2*l),1])\n", 164 | " for (i,j) in product(range(k,n-1), range(l,m-l)):\n", 165 | " #print(i,j)\n", 166 | " idx = (i-k)*(m-2*l)+j-l\n", 167 | " fvec_current =img[i-k:i,j-l:j+l+1].reshape([-1])\n", 168 | " fvec_current = np.append(fvec_current,img[i,j-l:j].reshape([-1]))\n", 169 | " fvec[idx,:]=fvec_current\n", 170 | " label[idx]=img[i,j]\n", 171 | " \n", 172 | " return fvec, label\n", 173 | "\n", 174 | "\n", 175 | "\n", 176 | "fvec,label = pred_vectors(img,[3,7])" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "# Part-3: Linear regression algorithm for seeing the baseline\n" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 5, 189 | "metadata": { 190 | "collapsed": false 191 | }, 192 | "outputs": [ 193 | { 194 | "name": "stdout", 195 | "output_type": "stream", 196 | "text": [ 197 | "Results with linear regression\n", 198 | "MSE is 33.4797180849\n", 199 | "Bits per pixel is 4.43469547481 bpp\n" 200 | ] 201 | } 202 | ], 203 | "source": [ 204 | "#First trial: Simple regression network. No relation to deep learning just to gain some intuition\n", 205 | "\n", 206 | "\n", 207 | "from sklearn import datasets, linear_model\n", 208 | "\n", 209 | "\n", 210 | "#Create the regression model using sklearn\n", 211 | "regr = linear_model.LinearRegression()\n", 212 | "regr.fit(fvec, label)\n", 213 | "\n", 214 | "#Predict and quantize the labels\n", 215 | "label_pred = np.round(regr.predict(fvec))\n", 216 | "\n", 217 | "#Calculate the error\n", 218 | "err=label_pred-label;\n", 219 | "\n", 220 | "print('Results with linear regression')\n", 221 | "#MSE\n", 222 | "print('MSE is ' + str(np.mean(err**2)))\n", 223 | "\n", 224 | "#Calculate Huffman coding of the error\n", 225 | "huffman_err = huffman(err.reshape([-1]).astype(str))\n", 226 | "print('Bits per pixel is ' + str(float(len(huffman_err[1])/float(len(err)))) + ' bpp')\n" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "# Part-4: MLP algorithm (initial phase)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "collapsed": true 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "#Second trial: MLP\n", 245 | "\n", 246 | "import tensorflow as tf\n", 247 | "\n", 248 | "def mlp(x, hidden_sizes, activation_fn=tf.nn.relu,dropout_rate=1.0,std_dev=1.0):\n", 249 | " if not isinstance(hidden_sizes, (list, tuple)):\n", 250 | " raise ValueError(\"hidden_sizes must be a list or a tuple\")\n", 251 | " scope_args = {'initializer': tf.random_normal_initializer(stddev=std_dev)}\n", 252 | " for k in range(len(hidden_sizes)-1):\n", 253 | " layer_name=\"weights\"+str(k)\n", 254 | " #FC layers\n", 255 | " with tf.variable_scope(layer_name, **scope_args):\n", 256 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes[k]])\n", 257 | " #b = tf.get_variable('b', shape=[hidden_sizes[k]])\n", 258 | " x = activation_fn(tf.matmul(x, W))# + b)\n", 259 | " #Dropout before the last layer\n", 260 | " x = tf.nn.dropout(x, keep_prob=dropout_rate)\n", 261 | " #Softmax layer\n", 262 | " with tf.variable_scope('outlayer', **scope_args):\n", 263 | " W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes[-1]])\n", 264 | " #b = tf.get_variable('b', shape=[hidden_sizes[-1]])\n", 265 | " return tf.matmul(x, W)# + b\n", 266 | " \n" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": { 273 | "collapsed": false, 274 | "scrolled": true 275 | }, 276 | "outputs": [ 277 | { 278 | "name": "stdout", 279 | "output_type": "stream", 280 | "text": [ 281 | "WARNING:tensorflow:From :42: initialize_all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.\n", 282 | "Instructions for updating:\n", 283 | "Use `tf.global_variables_initializer` instead.\n", 284 | "iteration 0\t mse loss: 0.29553\t Huffman bitrate is 7.474\n", 285 | "iteration 2000\t mse loss: 0.00106\t Huffman bitrate is 4.802\n", 286 | "iteration 4000\t mse loss: 0.00101\t Huffman bitrate is 4.725\n", 287 | "iteration 6000\t mse loss: 0.00085\t Huffman bitrate is 4.723\n", 288 | "iteration 8000\t mse loss: 0.00109\t Huffman bitrate is 4.704\n", 289 | "iteration 10000\t mse loss: 0.00072\t Huffman bitrate is 4.619\n", 290 | "iteration 12000\t mse loss: 0.00069\t Huffman bitrate is 4.620\n" 291 | ] 292 | } 293 | ], 294 | "source": [ 295 | "#Normalize the vectors and labels\n", 296 | "#Sometimes does not work beacuse of wron initialization\n", 297 | "\n", 298 | "fvec_n=fvec/np.round(np.max(label))\n", 299 | "label_n = label/np.round(np.max(label))\n", 300 | "def test_classification(model_function, learning_rate=0.1):\n", 301 | "\n", 302 | " with tf.Graph().as_default() as g:\n", 303 | " # where are you going to allocate memory and perform computations\n", 304 | " with tf.device(\"/gpu:0\"):\n", 305 | " \n", 306 | " # define model \"input placeholders\", i.e. variables that are\n", 307 | " # going to be substituted with input data on train/test time\n", 308 | " x_ = tf.placeholder(tf.float32, [None, fvec_n.shape[1]])\n", 309 | " y_ = tf.placeholder(tf.float32, [None, 1])\n", 310 | " y_logits = model_function(x_)\n", 311 | " \n", 312 | " # naive implementation of loss:\n", 313 | " # > losses = y_ * tf.log(tf.nn.softmax(y_logits))\n", 314 | " # > tf.reduce_mean(-tf.reduce_sum(losses, 1))\n", 315 | " # can be numerically unstable.\n", 316 | " #\n", 317 | " # so here we use tf.nn.softmax_cross_entropy_with_logits on the raw\n", 318 | " # outputs of 'y', and then average across the batch.\n", 319 | " \n", 320 | " #Basic MSE loss\n", 321 | " loss = tf.reduce_mean(tf.pow(tf.subtract(y_,y_logits), 2.0))\n", 322 | " #loss = tf.reduce_mean(tf.abs(tf.subtract(y_,y_logits)))\n", 323 | " #train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)\n", 324 | " train_step = tf.train.AdamOptimizer(learning_rate=5e-3,beta1=0.3,beta2=0.999, \n", 325 | " epsilon=1e-08,use_locking=False).minimize(loss)\n", 326 | " \n", 327 | " y_pred = y_logits\n", 328 | " correct_prediction = tf.equal(y_pred, y_)\n", 329 | " accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n", 330 | "\n", 331 | " with g.as_default(), tf.Session() as sess:\n", 332 | " # that is how we \"execute\" statements \n", 333 | " # (return None, e.g. init() or train_op())\n", 334 | " # or compute parts of graph defined above (loss, output, etc.)\n", 335 | " # given certain input (x_, y_)\n", 336 | " sess.run(tf.initialize_all_variables())\n", 337 | " #sess.run(tf.global_variables_initializer())\n", 338 | " \n", 339 | " # train\n", 340 | " #print(label.shape[0])\n", 341 | " ids=[i for i in range(100)]\n", 342 | " for iter_i in range(50001):\n", 343 | " #print(label.shape[0])\n", 344 | " #print(2*my_range)\n", 345 | " batch_xs = fvec_n[ids,:] \n", 346 | " batch_ys = label_n[ids]\n", 347 | " ids=[(ids[0]+100+i)%label.shape[0] for i in range(100)]\n", 348 | " sess.run(train_step, feed_dict={x_: batch_xs, y_: batch_ys})\n", 349 | " \n", 350 | " # test trained model\n", 351 | " if iter_i % 2000 == 0:\n", 352 | " tf_feed_dict = {x_: fvec_n, y_: label_n}\n", 353 | " acc_value = sess.run(loss, feed_dict=tf_feed_dict)\n", 354 | " y_pred_val = sess.run(y_pred, feed_dict=tf_feed_dict)\n", 355 | " err_value = np.round((sess.run(y_pred, feed_dict=tf_feed_dict)-label_n)*255)\n", 356 | " huffman_err = huffman(err_value.reshape([-1]).astype(str))\n", 357 | " huffman_bpp = float(len(huffman_err[1])/float(len(err_value)))\n", 358 | " print('iteration %d\\t mse loss: %.5f\\t Huffman bitrate is %.3f'%(iter_i, acc_value, huffman_bpp))\n", 359 | " err_value = np.round((sess.run(y_pred, feed_dict=tf_feed_dict)-label_n)*255)\n", 360 | " print(err_value)\n", 361 | " \n", 362 | "test_classification(lambda x: mlp(x, [32,16,8,4,2,1], activation_fn=tf.nn.relu,std_dev=1e-1), learning_rate=0.1)" 363 | ] 364 | } 365 | ], 366 | "metadata": { 367 | "kernelspec": { 368 | "display_name": "Python 2", 369 | "language": "python", 370 | "name": "python2" 371 | }, 372 | "language_info": { 373 | "codemirror_mode": { 374 | "name": "ipython", 375 | "version": 2 376 | }, 377 | "file_extension": ".py", 378 | "mimetype": "text/x-python", 379 | "name": "python", 380 | "nbconvert_exporter": "python", 381 | "pygments_lexer": "ipython2", 382 | "version": "2.7.12" 383 | } 384 | }, 385 | "nbformat": 4, 386 | "nbformat_minor": 2 387 | } 388 | -------------------------------------------------------------------------------- /MLP_lossless/.ipynb_checkpoints/desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | InfoTip=This folder is shared online. 3 | IconFile=C:\Program Files (x86)\Google\Drive\googledrivesync.exe 4 | IconIndex=16 5 | -------------------------------------------------------------------------------- /MLP_lossless/MayboleCastleLargeImage.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/MLP_lossless/MayboleCastleLargeImage.bmp -------------------------------------------------------------------------------- /MLP_lossless/README.md: -------------------------------------------------------------------------------- 1 | ## Lossless image compression using predictive coding with MLP 2 | 3 | For running the code, follow the steps in [MLP.ipynb](https://github.com/scelesticsiva/Neural-Networks-for-Image-Compression/tree/master/MLP_lossless/MLP.ipynb) 4 | 5 | [Benchmark](https://github.com/scelesticsiva/Neural-Networks-for-Image-Compression/tree/master/MLP_lossless/benchmark) 6 | includes MATLAB codes for getting the benchmark results using JPEG and JPEG-2000. 7 | -------------------------------------------------------------------------------- /MLP_lossless/baboon.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/MLP_lossless/baboon.bmp -------------------------------------------------------------------------------- /MLP_lossless/benchmark/Baboon_cheek.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/MLP_lossless/benchmark/Baboon_cheek.jpg -------------------------------------------------------------------------------- /MLP_lossless/benchmark/Baboon_eye.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/MLP_lossless/benchmark/Baboon_eye.jpg -------------------------------------------------------------------------------- /MLP_lossless/benchmark/MayboleCastleLargeImage.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/MLP_lossless/benchmark/MayboleCastleLargeImage.bmp -------------------------------------------------------------------------------- /MLP_lossless/benchmark/baboon.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/MLP_lossless/benchmark/baboon.bmp -------------------------------------------------------------------------------- /MLP_lossless/benchmark/baboon_cheek.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/MLP_lossless/benchmark/baboon_cheek.bmp -------------------------------------------------------------------------------- /MLP_lossless/benchmark/baboon_eye.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/MLP_lossless/benchmark/baboon_eye.bmp -------------------------------------------------------------------------------- /MLP_lossless/benchmark/bppHuffman.m: -------------------------------------------------------------------------------- 1 | function [bpp, entropy]=bppHuffman(array,max_value) 2 | %Calculates the avarage bits per pixel in the array using Huffman Coding 3 | %bpp gives bits per pixel, entropy gives Shannon Entropy (not so important 4 | %I guess) 5 | 6 | hstcounts = histcounts(array,0:max_value+1); 7 | prob = hstcounts/sum(hstcounts); 8 | dict = huffmandict(0:max_value,prob); 9 | keys = zeros(1,max_value+1); 10 | for k=1:max_value+1 11 | key = dict{k,2}; 12 | keys(k)=length(key); 13 | end 14 | bpp=sum(keys.*prob); 15 | entropy=-1*sum(prob(prob>=1e-5).*log(prob(prob>=1e-5))); -------------------------------------------------------------------------------- /MLP_lossless/benchmark/buff.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/MLP_lossless/benchmark/buff.mat -------------------------------------------------------------------------------- /MLP_lossless/benchmark/desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | InfoTip=This folder is shared online. 3 | IconFile=C:\Program Files (x86)\Google\Drive\googledrivesync.exe 4 | IconIndex=16 5 | -------------------------------------------------------------------------------- /MLP_lossless/benchmark/lena512.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/MLP_lossless/benchmark/lena512.bmp -------------------------------------------------------------------------------- /MLP_lossless/benchmark/lena_eye.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/MLP_lossless/benchmark/lena_eye.bmp -------------------------------------------------------------------------------- /MLP_lossless/lena512.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/MLP_lossless/lena512.bmp -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Image Compression Using Deep Learning 2 | ## Final Project for EC500K1 - Deep Learning Course 3 | 4 | ### Contributors 5 | Ozan Tezcan 6 | 7 | Kubra Cilingir 8 | 9 | Sivaramakrishnan Sankarapandian 10 | 11 | 12 | ### Dependencies 13 | 14 | - Python 3.6.0 15 | - Tensorflow 1 16 | - Numpy 17 | - Scikit Image 18 | 19 | ### Description 20 | In this project, we investigated different types of neural networks on the image compression problem. 21 | We divide the problem into two parts 22 | 23 | #### 1. Lossless Compression 24 | 25 | We used a MLP based predictive coding for the lossless compression. 26 | Details of the algorithm can be found in the [report](https://github.com/scelesticsiva/Neural-Networks-for-Image-Compression/blob/master/report.pdf) 27 | 28 | The codes and instructions on how to run can be found in [MLP_lossless](https://github.com/scelesticsiva/Neural-Networks-for-Image-Compression/tree/master/MLP_lossless) 29 | 30 | #### 2. Lossy Compression 31 | 32 | For the lossy compresison, we tried three different architectures. 33 | Details,results and some exmaples of all of them can be found in the [report](https://github.com/scelesticsiva/Neural-Networks-for-Image-Compression/blob/master/report.pdf) 34 | The codes and instructions on how to run can be found in the following folders 35 | - [Autoencoders](https://github.com/scelesticsiva/Neural-Networks-for-Image-Compression/tree/master/Autoencoders) 36 | - [GAN](https://github.com/scelesticsiva/Neural-Networks-for-Image-Compression/tree/master/GAN) 37 | - [GAN-AE](https://github.com/scelesticsiva/Neural-Networks-for-Image-Compression/tree/master/GAN-AE) 38 | -------------------------------------------------------------------------------- /final_figures.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/final_figures.pptx -------------------------------------------------------------------------------- /functions/.idea/functions.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /functions/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /functions/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /functions/.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | color 65 | apply 66 | fname 67 | 68 | 69 | 70 | 78 | 79 | 80 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 109 | 110 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 156 | 157 | 168 | 169 | 187 | 188 | 202 | 203 | 204 | 206 | 207 | 208 | 209 | 1500653040185 210 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | -------------------------------------------------------------------------------- /functions/__pycache__/dataset_generator.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/functions/__pycache__/dataset_generator.cpython-36.pyc -------------------------------------------------------------------------------- /functions/__pycache__/image_func.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/functions/__pycache__/image_func.cpython-36.pyc -------------------------------------------------------------------------------- /functions/__pycache__/network.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/functions/__pycache__/network.cpython-36.pyc -------------------------------------------------------------------------------- /functions/__pycache__/read_cifar10.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/functions/__pycache__/read_cifar10.cpython-36.pyc -------------------------------------------------------------------------------- /functions/dataset_generator.py: -------------------------------------------------------------------------------- 1 | import read_cifar10 as cf10 2 | 3 | 4 | def cifar10_dataset_generator(dataset_name, batch_size, restrict_size=1000): 5 | assert dataset_name in ['train', 'test'] 6 | assert batch_size > 0 or batch_size == -1 # -1 for entire dataset 7 | 8 | X_all_unrestricted, y_all = (cf10.load_training_data() if dataset_name == 'train' 9 | else cf10.load_test_data()) 10 | 11 | actual_restrict_size = restrict_size if dataset_name == 'train' else int(1e10) 12 | X_all = X_all_unrestricted[:actual_restrict_size] 13 | data_len = X_all.shape[0] 14 | batch_size = batch_size if batch_size > 0 else data_len 15 | 16 | X_all_padded = np.concatenate([X_all, X_all[:batch_size]], axis=0) 17 | y_all_padded = np.concatenate([y_all, y_all[:batch_size]], axis=0) 18 | 19 | for slice_i in range(math.ceil(data_len / batch_size)): 20 | idx = slice_i * batch_size 21 | # X_batch = X_all_padded[idx:idx + batch_size] 22 | X_batch = X_all_padded[idx:idx + batch_size] * 255 # bugfix: thanks Zezhou Sun! 23 | y_batch = np.ravel(y_all_padded[idx:idx + batch_size]) 24 | yield X_batch.astype(np.uint8), y_batch.astype(np.uint8) 25 | 26 | -------------------------------------------------------------------------------- /functions/image_func.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | def img2block(im): 5 | ''' 6 | Image patching code. It patches a given RGB image into 32x32 blocks and returns a 4D array with size 7 | [number_of_patches,32,32,3] 8 | ''' 9 | im = im.astype(np.float32) 10 | row, col, color = im.shape 11 | im_bl = np.zeros((int(row * col / 1024), 32, 32, 3)).astype(np.float32) 12 | count = 0 13 | for i in range(0, row - row % 32, 32): 14 | for j in range(0, col - col % 32, 32): 15 | im_bl[count, :, :, :] = im[i:i + 32, j:j + 32, :] 16 | count = count + 1 17 | im_bl = im_bl / 255. 18 | return im_bl 19 | 20 | 21 | def block2img(img_blocks, img_size): 22 | ''' 23 | Function for reconstructing the image back from patches 24 | ''' 25 | row, col = img_size 26 | img = np.zeros((row, col, 3)).astype(np.float32) 27 | n, k, l, c = img_blocks.shape 28 | 29 | for i in range(0, int(row / k)): 30 | for j in range(0, int(col / k)): 31 | img[i * k:(i + 1) * k, j * l:(j + 1) * l, :] = img_blocks[int(i * col / k + j), :, :, :] 32 | return img 33 | 34 | 35 | def convert2uint8(img): 36 | img[img>255]=255 37 | img[img<0]=0 38 | return img.astype(np.uint8) -------------------------------------------------------------------------------- /functions/network.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import skimage.io 3 | import numpy as np 4 | import image_func as img 5 | import os 6 | 7 | 8 | def cnn_autoencoder(x_, kernels1=[5, 7], kernels2=[7, 5], filters1=[16, 128], filters2=[128, 3], pool_size=[1, 2, 2, 1], 9 | name='autoencoder'): 10 | ''' 11 | Autoencoder network 12 | 13 | Inputs: 14 | x_ (tf.placeholder) : Input tensor 15 | kernels1 (1D array) : Size of the encoder kernels (assumed square kernels) 16 | kernels2 (1D array) : Size of the decoder kernels (assumed square kernels) 17 | filters1 (1D array) : Number of filters in encoder layers 18 | filters2 (1D array) : Number of filters in decoder layers 19 | pool_size (1D array): Pooling size in each layer. Its length must be equal to len(kernels1)+len(kernels2) 20 | First len(kernels1) terms will be used as pooling layers of encoder/ 21 | Remainin terms will be used as unpooling layers of decoder 22 | 23 | Returns: 24 | out_ (tf.placeholder) : Output of the autoencoder without quantization in the middle 25 | out_quant (tf.placeholder): Output of the autoencoder with quantization in the middle 26 | ''' 27 | with tf.variable_scope(name): 28 | out_ = x_ 29 | for k in range(len(kernels1)): 30 | conv = tf.layers.conv2d(inputs=out_, 31 | filters=filters1[k], 32 | kernel_size=[kernels1[k], kernels1[k]], 33 | padding="same", 34 | activation=tf.nn.relu, 35 | name='conv' + str(k)) 36 | pool_now = pool_size[k] 37 | if (pool_now == 1): 38 | out_ = conv 39 | else: 40 | out_ = tf.layers.max_pooling2d(inputs=conv, 41 | pool_size=[pool_now, pool_now], 42 | strides=pool_now, 43 | name='pool' + str(k)) 44 | 45 | out_quant = tf.round(out_ * 255.) / 255. 46 | 47 | for k in range(len(kernels2)): 48 | with tf.variable_scope("deconv") as var_scope: 49 | pool_now = pool_size[k + len(kernels1)] 50 | if (pool_now == 1): 51 | x_up = out_ 52 | out_ = tf.layers.conv2d(inputs=x_up, 53 | filters=filters2[k], 54 | kernel_size=[kernels2[k], kernels2[k]], 55 | padding="same", 56 | activation=tf.nn.relu, 57 | name='deconv' + str(k)) 58 | var_scope.reuse_variables() 59 | x_quant_up = out_quant 60 | out_quant = tf.layers.conv2d(inputs=x_quant_up, 61 | filters=filters2[k], 62 | kernel_size=[kernels2[k], kernels2[k]], 63 | padding="same", 64 | activation=tf.nn.relu, 65 | name='deconv' + str(k)) 66 | else: 67 | sh = out_.get_shape().as_list() 68 | x_up = tf.image.resize_images(out_, [sh[1] * pool_now, sh[2] * pool_now]) 69 | out_ = tf.layers.conv2d(inputs=x_up, 70 | filters=filters2[k], 71 | kernel_size=[kernels2[k], kernels2[k]], 72 | padding="same", 73 | activation=tf.nn.relu, 74 | name='deconv' + str(k)) 75 | var_scope.reuse_variables() 76 | x_quant_up = tf.image.resize_images(out_quant, [sh[1] * pool_now, sh[2] * pool_now]) 77 | out_quant = tf.layers.conv2d(inputs=x_quant_up, 78 | filters=filters2[k], 79 | kernel_size=[kernels2[k], kernels2[k]], 80 | padding="same", 81 | activation=tf.nn.relu, 82 | name='deconv' + str(k)) 83 | return out_, out_quant 84 | 85 | 86 | def apply_classification_loss_mse(kernels1=[5, 7], kernels2=[7, 5], 87 | filters1=[16, 128], filters2=[128, 3], 88 | pool_size=[1, 2, 2, 1], learning_rate=1., FT=False): 89 | ''' 90 | MSE based autoencoder optimizer. 91 | 92 | Inputs: 93 | kernels1 (1D array) : Size of the encoder kernels (assumed square kernels) 94 | kernels2 (1D array) : Size of the decoder kernels (assumed square kernels) 95 | filters1 (1D array) : Number of filters in encoder layers 96 | filters2 (1D array) : Number of filters in decoder layers 97 | pool_size (1D array): Pooling size in each layer. Its length must be equal to len(kernels1)+len(kernels2) 98 | First len(kernels1) terms will be used as pooling layers of encoder/ 99 | Remainin terms will be used as unpooling layers of decoder 100 | learning_rate(float): Learning rate of the optimizer 101 | FT (boolean) : Boolean value for fine-tuning operation on decoder weights 102 | 103 | 104 | Returns: 105 | model_dict : Dictionary of the required output files 106 | ''' 107 | 108 | with tf.Graph().as_default() as g: 109 | with tf.device("/gpu:0"): # use gpu:0 if on GPU 110 | x_ = tf.placeholder(tf.float32, [None, 32, 32, 3]) 111 | (x_out, x_out_quant) = cnn_autoencoder(x_, pool_size=pool_size, kernels1=kernels1, filters1=filters1, 112 | kernels2=kernels2, filters2=filters2) 113 | 114 | mse_loss1 = tf.reduce_mean(tf.subtract(x_, x_out) ** 2) 115 | mse_loss2 = tf.reduce_mean(tf.subtract(x_, x_out_quant) ** 2) 116 | 117 | trainer = tf.train.AdamOptimizer(learning_rate=learning_rate) 118 | if (FT): 119 | with tf.variable_scope('autoencoder/deconv', reuse=True) as vs: 120 | var_list = [v for v in tf.global_variables() if v.name.startswith(vs.name)] 121 | train_op = trainer.minimize(mse_loss1, var_list=var_list) 122 | else: 123 | train_op = trainer.minimize(mse_loss1) 124 | 125 | model_dict = {'graph': g, 'inputs': x_, 'outputs': x_out, 'train_op': train_op, 'loss1': mse_loss1, 126 | 'loss2': mse_loss2} 127 | 128 | return model_dict 129 | 130 | 131 | # Working 132 | def apply_classification_loss_mse_with_rnn(kernels1=[5, 7], kernels2=[7, 5], 133 | filters1=[16, 128], filters2=[128, 3], 134 | pool_size=[2, 2], learning_rate=1., FT=False, depth=3): 135 | ''' 136 | MSE based autoencoder optimizer. 137 | 138 | Inputs: 139 | kernels1 (1D array) : Size of the encoder kernels (assumed square kernels) 140 | kernels2 (1D array) : Size of the decoder kernels (assumed square kernels) 141 | filters1 (1D array) : Number of filters in encoder layers 142 | filters2 (1D array) : Number of filters in decoder layers 143 | pool_size (1D array): Pooling size in each layer. Its length must be equal to len(kernels1)+len(kernels2) 144 | First len(kernels1) terms will be used as pooling layers of encoder/ 145 | Remainin terms will be used as unpooling layers of decoder 146 | learning_rate(float): Learning rate of the optimizer 147 | FT (boolean) : Boolean value for fine-tuning operation on decoder weights 148 | depth(integer) 149 | 150 | Returns: 151 | model_dict : Dictionary of the required output files 152 | ''' 153 | with tf.Graph().as_default() as g: 154 | with tf.device("/gpu:0"): # use gpu:0 if on GPU 155 | x_ = tf.placeholder(tf.float32, [None, 32, 32, 3]) 156 | 157 | (x_out2, x_out_quant) = cnn_autoencoder(x_, pool_size=pool_size, kernels1=kernels1, filters1=filters1, 158 | kernels2=kernels2, filters2=filters2, name='filter0') 159 | x_out1 = x_ 160 | mse_loss1 = tf.reduce_mean(tf.subtract(x_out1, x_out2) ** 2) 161 | for k in range(1, depth): 162 | (x_out3, x_out_quant) = cnn_autoencoder(x_out1 - x_out2, pool_size=pool_size, kernels1=kernels1, 163 | filters1=filters1, 164 | kernels2=kernels2, filters2=filters2, name='filter' + str(k)) 165 | mse_loss1 = tf.add(mse_loss1, tf.reduce_mean(tf.subtract(x_out1, tf.add(x_out2, x_out3)) ** 2)) 166 | x_out1 = x_out2 167 | x_out2 = x_out3 168 | x_out3 = x_out2 169 | # y_dict = dict(labels=y_, logits=y_logits) 170 | # losses = tf.nn.sparse_softmax_cross_entropy_with_logits(**y_dict) 171 | # cross_entropy_loss = tf.reduce_mean(losses) 172 | # mse_loss1=tf.reduce_mean(tf.subtract(x_,x_out)**2) 173 | # a=tf.pad(tf.subtract(x_,x_out),[[0,0],[16,16],[16,16],[0,0]],'CONSTANT') 174 | 175 | # mse_loss1=tf.reduce_mean(tf.nn.conv2d(a,h3,strides=[1,1,1,1],padding="VALID")**2) 176 | mse_loss2 = tf.reduce_mean(tf.subtract(x_, x_out3) ** 2) 177 | trainer = tf.train.AdamOptimizer(learning_rate=learning_rate) 178 | if (FT): 179 | with tf.variable_scope('deconv', reuse=True) as vs: 180 | var_list = [v for v in tf.global_variables() if v.name.startswith(vs.name)] 181 | train_op = trainer.minimize(mse_loss1, var_list=var_list) 182 | else: 183 | train_op = trainer.minimize(mse_loss1) 184 | 185 | # y_pred = tf.argmax(tf.nn.softmax(y_logits), dimension=1) 186 | # correct_prediction = tf.equal(tf.cast(y_pred, tf.int32), y_) 187 | # accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 188 | 189 | model_dict = {'graph': g, 'inputs': x_, 'outputs': x_out_quant, 'train_op': train_op, 'loss1': mse_loss1, 190 | 'loss2': mse_loss2} 191 | 192 | return model_dict 193 | 194 | 195 | 196 | def train_model(model_dict, x_tr, x_test, img_32, train_every=100, test_every=200, max_iter=20001, load=False, 197 | fname='cifar10_recon', outname='/tmp/cnn_autoencoder', ftname='/tmp/cnn_autoencoder'): 198 | ''' 199 | Inputs: 200 | model_dict: Output of apply_classification_loss_mse 201 | x_tr : Training images 202 | x_test : Test Images 203 | x_32 : 32x32 patches of a big image 204 | load : Boolean for loading the weights from pre-trained network 205 | fname : Directory to save outputs 206 | outname : Directory to save (load=False) or load (load=True) weights 207 | ftname : Directory to save new weights when load+True 208 | ''' 209 | if not os.path.exists('..//'+fname): 210 | os.makedirs('..//'+fname) 211 | with model_dict['graph'].as_default(), tf.Session() as sess: 212 | sess.run(tf.global_variables_initializer()) 213 | saver = tf.train.Saver() 214 | if (load): 215 | saver.restore(sess, outname) 216 | print("Model loaded") 217 | else: 218 | sess.run(tf.global_variables_initializer()) 219 | 220 | ids = [i for i in range(100)] 221 | for iter_i in range(max_iter): 222 | batch_xs = x_tr[ids, :, :, :] 223 | ids = [(ids[0] + 100 + i) % x_tr.shape[0] for i in range(100)] 224 | sess.run(model_dict['train_op'], feed_dict={model_dict['inputs']: batch_xs}) 225 | 226 | # test trained model 227 | if iter_i % train_every == 0: 228 | tf_feed_dict = {model_dict['inputs']: batch_xs} 229 | loss_val = sess.run(model_dict['loss1'], feed_dict={model_dict['inputs']: batch_xs}) 230 | print('iteration %d\t train mse: %.3E\t' % (iter_i, loss_val)) 231 | if iter_i % test_every == 0: 232 | # tf_feed_dict = {x_: x_test} 233 | loss_val1 = sess.run(model_dict['loss1'], feed_dict={model_dict['inputs']: x_test}) 234 | loss_val2 = sess.run(model_dict['loss2'], feed_dict={model_dict['inputs']: x_test}) 235 | print( 236 | 'iteration %d\t TEST MSE: %.3E\t TEST MSE(Quantized): %.3E\t' % (iter_i, loss_val1, loss_val2)) 237 | 238 | img_block = sess.run(model_dict['outputs'], 239 | feed_dict={model_dict['inputs']: img_32}) 240 | x_from_test = sess.run(model_dict['outputs'], 241 | feed_dict={ 242 | model_dict['inputs']: x_test[:5, :, :, :].reshape([-1, 32, 32, 3])}) 243 | 244 | img_recon = img.block2img(img_block, (512, 512)) 245 | img_recon = img.convert2uint8(img_recon * 255.) 246 | skimage.io.imsave('../' + fname + '/img32_recon_' + str(int(iter_i / test_every)) + '.tiff', 247 | img_recon) 248 | 249 | for i in range(5): 250 | img_recon = img.convert2uint8((255 * x_from_test[i, :, :, :]).reshape([32, 32, 3])).astype(np.uint8) 251 | skimage.io.imsave( 252 | '../' + fname + '/test' + str(i) + '_' + str(int(iter_i / test_every)) + '.tiff', img_recon) 253 | 254 | saver = tf.train.Saver() 255 | if load: 256 | outname = ftname 257 | save_path = saver.save(sess, outname) 258 | print("Model saved in file: %s" % save_path) -------------------------------------------------------------------------------- /functions/read_cifar10.py: -------------------------------------------------------------------------------- 1 | #### 2 | # COPIED FROM https://github.com/Hvass-Labs/TensorFlow-Tutorials/ 3 | # and modified 4 | 5 | 6 | ######################################################################## 7 | # 8 | # Functions for downloading the CIFAR-10 data-set from the internet 9 | # and loading it into memory. 10 | # 11 | # Implemented in Python 3.5 12 | # 13 | # Usage: 14 | # 1) Set the variable data_path with the desired storage path. 15 | # 2) Call maybe_download_and_extract() to download the data-set 16 | # if it is not already located in the given data_path. 17 | # 3) Call load_class_names() to get an array of the class-names. 18 | # 4) Call load_training_data() and load_test_data() to get 19 | # the images, class-numbers and one-hot encoded class-labels 20 | # for the training-set and test-set. 21 | # 5) Use the returned data in your own program. 22 | # 23 | # Format: 24 | # The images for the training- and test-sets are returned as 4-dim numpy 25 | # arrays each with the shape: [image_number, height, width, channel] 26 | # where the individual pixels are floats between 0.0 and 1.0. 27 | # 28 | ######################################################################## 29 | # 30 | # This file is part of the TensorFlow Tutorials available at: 31 | # 32 | # https://github.com/Hvass-Labs/TensorFlow-Tutorials 33 | # 34 | # Published under the MIT License. See the file LICENSE for details. 35 | # 36 | # Copyright 2016 by Magnus Erik Hvass Pedersen 37 | # 38 | ######################################################################## 39 | 40 | import numpy as np 41 | import pickle 42 | 43 | import sys 44 | import os 45 | import six.moves.urllib as urllib 46 | import tarfile 47 | import zipfile 48 | 49 | 50 | ######################################################################## 51 | 52 | 53 | def _print_download_progress(count, block_size, total_size): 54 | """ 55 | Function used for printing the download progress. 56 | Used as a call-back function in maybe_download_and_extract(). 57 | """ 58 | 59 | # Percentage completion. 60 | pct_complete = float(count * block_size) / total_size 61 | 62 | # Status-message. Note the \r which means the line should overwrite itself. 63 | msg = "\r- Download progress: {0:.1%}".format(pct_complete) 64 | 65 | # Print it. 66 | sys.stdout.write(msg) 67 | sys.stdout.flush() 68 | 69 | 70 | ######################################################################## 71 | 72 | 73 | def dataset_maybe_download_and_extract(url, download_dir): 74 | """ 75 | Download and extract the data if it doesn't already exist. 76 | Assumes the url is a tar-ball file. 77 | :param url: 78 | Internet URL for the tar-file to download. 79 | Example: "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" 80 | :param download_dir: 81 | Directory where the downloaded file is saved. 82 | Example: "data/CIFAR-10/" 83 | :return: 84 | Nothing. 85 | """ 86 | 87 | # Filename for saving the file downloaded from the internet. 88 | # Use the filename from the URL and add it to the download_dir. 89 | filename = url.split('/')[-1] 90 | file_path = os.path.join(download_dir, filename) 91 | 92 | # Check if the file already exists. 93 | # If it exists then we assume it has also been extracted, 94 | # otherwise we need to download and extract it now. 95 | if not os.path.exists(file_path): 96 | # Check if the download directory exists, otherwise create it. 97 | if not os.path.exists(download_dir): 98 | os.makedirs(download_dir) 99 | 100 | # Download the file from the internet. 101 | file_path, _ = urllib.request.urlretrieve(url=url, 102 | filename=file_path, 103 | reporthook=_print_download_progress) 104 | 105 | print() 106 | print("Download finished. Extracting files.") 107 | 108 | if file_path.endswith(".zip"): 109 | # Unpack the zip-file. 110 | zipfile.ZipFile(file=file_path, mode="r").extractall(download_dir) 111 | elif file_path.endswith((".tar.gz", ".tgz")): 112 | # Unpack the tar-ball. 113 | tarfile.open(name=file_path, mode="r:gz").extractall(download_dir) 114 | 115 | print("Done.") 116 | 117 | 118 | ######################################################################## 119 | 120 | 121 | ######################################################################## 122 | 123 | # Directory where you want to download and save the data-set. 124 | # Set this before you start calling any of the functions below. 125 | data_path = "./CIFAR-10/" 126 | 127 | # URL for the data-set on the internet. 128 | data_url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" 129 | 130 | ######################################################################## 131 | # Various constants for the size of the images. 132 | # Use these constants in your own program. 133 | 134 | # Width and height of each image. 135 | img_size = 32 136 | 137 | # Number of channels in each image, 3 channels: Red, Green, Blue. 138 | num_channels = 3 139 | 140 | # Length of an image when flattened to a 1-dim array. 141 | img_size_flat = img_size * img_size * num_channels 142 | 143 | # Number of classes. 144 | num_classes = 10 145 | 146 | ######################################################################## 147 | # Various constants used to allocate arrays of the correct size. 148 | 149 | # Number of files for the training-set. 150 | _num_files_train = 5 151 | 152 | # Number of images for each batch-file in the training-set. 153 | _images_per_file = 10000 154 | 155 | # Total number of images in the training-set. 156 | # This is used to pre-allocate arrays for efficiency. 157 | _num_images_train = _num_files_train * _images_per_file 158 | 159 | 160 | ######################################################################## 161 | # Private functions for downloading, unpacking and loading data-files. 162 | 163 | 164 | def _get_file_path(filename=""): 165 | """ 166 | Return the full path of a data-file for the data-set. 167 | 168 | If filename=="" then return the directory of the files. 169 | """ 170 | 171 | return os.path.join(data_path, "cifar-10-batches-py/", filename) 172 | 173 | 174 | def _unpickle(filename): 175 | """ 176 | Unpickle the given file and return the data. 177 | 178 | Note that the appropriate dir-name is prepended the filename. 179 | """ 180 | 181 | # Create full path for the file. 182 | file_path = _get_file_path(filename) 183 | 184 | with open(file_path, mode='rb') as file: 185 | # In Python 3.X it is important to set the encoding, 186 | # otherwise an exception is raised here. 187 | data = pickle.load(file, encoding='bytes') 188 | 189 | return data 190 | 191 | 192 | def _convert_images(raw): 193 | """ 194 | Convert images from the CIFAR-10 format and 195 | return a 4-dim array with shape: [image_number, height, width, channel] 196 | where the pixels are floats between 0.0 and 1.0. 197 | """ 198 | 199 | # Convert the raw images from the data-files to floating-points. 200 | raw_float = np.array(raw, dtype=float) / 255.0 201 | 202 | # Reshape the array to 4-dimensions. 203 | images = raw_float.reshape([-1, num_channels, img_size, img_size]) 204 | 205 | # Reorder the indices of the array. 206 | images = images.transpose([0, 2, 3, 1]) 207 | 208 | return images 209 | 210 | 211 | def _load_data(filename): 212 | """ 213 | Load a pickled data-file from the CIFAR-10 data-set 214 | and return the converted images (see above) and the class-number 215 | for each image. 216 | """ 217 | 218 | # Load the pickled data-file. 219 | data = _unpickle(filename) 220 | 221 | # Get the raw images. 222 | raw_images = data[b'data'] 223 | 224 | # Get the class-numbers for each image. Convert to numpy-array. 225 | cls = np.array(data[b'labels']) 226 | 227 | # Convert the images. 228 | images = _convert_images(raw_images) 229 | 230 | return images, cls 231 | 232 | 233 | ######################################################################## 234 | # Public functions that you may call to download the data-set from 235 | # the internet and load the data into memory. 236 | 237 | 238 | def maybe_download_and_extract(): 239 | """ 240 | Download and extract the CIFAR-10 data-set if it doesn't already exist 241 | in data_path (set this variable first to the desired path). 242 | """ 243 | 244 | dataset_maybe_download_and_extract(url=data_url, download_dir=data_path) 245 | 246 | 247 | def load_class_names(): 248 | """ 249 | Load the names for the classes in the CIFAR-10 data-set. 250 | 251 | Returns a list with the names. Example: names[3] is the name 252 | associated with class-number 3. 253 | """ 254 | 255 | # Load the class-names from the pickled file. 256 | raw = _unpickle(filename="batches.meta")[b'label_names'] 257 | 258 | # Convert from binary strings. 259 | names = [x.decode('utf-8') for x in raw] 260 | 261 | return names 262 | 263 | 264 | def load_training_data(): 265 | """ 266 | Load all the training-data for the CIFAR-10 data-set. 267 | 268 | The data-set is split into 5 data-files which are merged here. 269 | 270 | Returns the images, class-numbers and one-hot encoded class-labels. 271 | """ 272 | 273 | maybe_download_and_extract() 274 | # Pre-allocate the arrays for the images and class-numbers for efficiency. 275 | images = np.zeros(shape=[_num_images_train, img_size, img_size, num_channels], 276 | dtype=float) 277 | cls = np.zeros(shape=[_num_images_train], dtype=int) 278 | 279 | # Begin-index for the current batch. 280 | begin = 0 281 | 282 | # For each data-file. 283 | for i in range(_num_files_train): 284 | # Load the images and class-numbers from the data-file. 285 | images_batch, cls_batch = _load_data(filename="data_batch_" + str(i + 1)) 286 | 287 | # Number of images in this batch. 288 | num_images = len(images_batch) 289 | 290 | # End-index for the current batch. 291 | end = begin + num_images 292 | 293 | # Store the images into the array. 294 | images[begin:end, :] = images_batch 295 | 296 | # Store the class-numbers into the array. 297 | cls[begin:end] = cls_batch 298 | 299 | # The begin-index for the next batch is the current end-index. 300 | begin = end 301 | 302 | return images, cls 303 | 304 | 305 | def load_test_data(): 306 | """ 307 | Load all the test-data for the CIFAR-10 data-set. 308 | 309 | Returns the images, class-numbers and one-hot encoded class-labels. 310 | """ 311 | 312 | images, cls = _load_data(filename="test_batch") 313 | 314 | return images, cls 315 | 316 | ######################################################################## 317 | -------------------------------------------------------------------------------- /report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/report.pdf -------------------------------------------------------------------------------- /test_img/bu2010_recon.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/test_img/bu2010_recon.tif -------------------------------------------------------------------------------- /test_img/jpg_test.m: -------------------------------------------------------------------------------- 1 | img = imread('lena512color.tiff'); 2 | imwrite(img,'lena.jp2','CompressionRatio',16) -------------------------------------------------------------------------------- /test_img/lena512color.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/test_img/lena512color.tiff -------------------------------------------------------------------------------- /test_img/lion.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/test_img/lion.tiff -------------------------------------------------------------------------------- /test_img/lion_recon2_convrealFTpx8.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/test_img/lion_recon2_convrealFTpx8.tiff -------------------------------------------------------------------------------- /wiki image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scelesticsiva/Neural-Networks-for-Image-Compression/f399e6fa9e4b64d89a5284ce4041211daaaa725a/wiki image.png --------------------------------------------------------------------------------